297 lines
11 KiB
Python
297 lines
11 KiB
Python
# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from deepdiff import DeepDiff
|
|
from deepdiff.helper import RemapDict
|
|
import jsonpickle
|
|
|
|
from deckhand.control import common
|
|
from deckhand.db.sqlalchemy import api as db_api
|
|
from deckhand.engine import utils
|
|
from deckhand import errors
|
|
|
|
|
|
def revision_diff(revision_id, comparison_revision_id, deepdiff=False):
|
|
"""Generate the diff between two revisions.
|
|
|
|
Generate the diff between the two revisions: `revision_id` and
|
|
`comparison_revision_id`.
|
|
a. When deepdiff=False: A basic comparison of the revisions in terms of
|
|
how the buckets involved have changed is generated. Only buckets with
|
|
existing documents in either of the two revisions in question will be
|
|
reported.
|
|
b. When deepdiff=True: Along with basic comparision, It will generate deep
|
|
diff between revisions' modified buckets.
|
|
|
|
Only in case of diff, The ordering of the two revision IDs is
|
|
interchangeable, i.e. no matter the order, the same result is generated.
|
|
|
|
The differences include:
|
|
|
|
- "created": A bucket has been created between the revisions.
|
|
- "deleted": A bucket has been deleted between the revisions.
|
|
- "modified": A bucket has been modified between the revisions.
|
|
When deepdiff is enabled, It also includes deep
|
|
difference between the revisions.
|
|
- "unmodified": A bucket remains unmodified between the revisions.
|
|
|
|
:param revision_id: ID of the first revision.
|
|
:param comparison_revision_id: ID of the second revision.
|
|
:param deepdiff: Whether deepdiff needed or not.
|
|
:returns: A dictionary, keyed with the bucket IDs, containing any of the
|
|
differences enumerated above.
|
|
|
|
Examples Diff::
|
|
|
|
# GET /api/v1.0/revisions/6/diff/3
|
|
bucket_a: created
|
|
bucket_b: deleted
|
|
bucket_c: modified
|
|
bucket_d: unmodified
|
|
|
|
# GET /api/v1.0/revisions/0/diff/6
|
|
bucket_a: created
|
|
bucket_c: created
|
|
bucket_d: created
|
|
|
|
# GET /api/v1.0/revisions/6/diff/6
|
|
bucket_a: unmodified
|
|
bucket_c: unmodified
|
|
bucket_d: unmodified
|
|
|
|
# GET /api/v1.0/revisions/0/diff/0
|
|
{}
|
|
|
|
Examples DeepDiff::
|
|
|
|
# GET /api/v1.0/revisions/3/deepdiff/4
|
|
bucket_a: modified
|
|
bucket_a diff:
|
|
document_changed:
|
|
count: 1
|
|
details:
|
|
('example/Kind/v1', 'doc-b'):
|
|
data_changed:
|
|
values_changed:
|
|
root['foo']: {new_value: 3, old_value: 2}
|
|
metadata_changed: {}
|
|
|
|
# GET /api/v1.0/revisions/2/deepdiff/3
|
|
bucket_a: modified
|
|
bucket_a diff:
|
|
document_added:
|
|
count: 1
|
|
details:
|
|
- [example/Kind/v1, doc-c]
|
|
|
|
# GET /api/v1.0/revisions/0/deepdiff/0
|
|
{}
|
|
|
|
# GET /api/v1.0/revisions/0/deepdiff/3
|
|
bucket_a: created
|
|
"""
|
|
if deepdiff:
|
|
docs = (_rendered_doc(revision_id) if revision_id != 0 else [])
|
|
comparison_docs = (_rendered_doc(comparison_revision_id)
|
|
if comparison_revision_id != 0 else [])
|
|
else:
|
|
# Retrieve document history for each revision. Since `revision_id` of 0
|
|
# doesn't exist, treat it as a special case: empty list.
|
|
docs = (db_api.revision_documents_get(revision_id,
|
|
include_history=True,
|
|
unique_only=False)
|
|
if revision_id != 0 else [])
|
|
comparison_docs = (db_api.revision_documents_get(
|
|
comparison_revision_id,
|
|
include_history=True,
|
|
unique_only=False
|
|
) if comparison_revision_id != 0 else [])
|
|
|
|
# Remove each deleted document and its older counterparts because those
|
|
# documents technically don't exist.
|
|
docs = utils.exclude_deleted_documents(docs)
|
|
comparison_docs = utils.exclude_deleted_documents(comparison_docs)
|
|
|
|
revision = db_api.revision_get(revision_id) if revision_id != 0 else None
|
|
comparison_revision = (db_api.revision_get(comparison_revision_id)
|
|
if comparison_revision_id != 0 else None)
|
|
|
|
# Each dictionary below, keyed with the bucket's name, references the list
|
|
# of documents related to each bucket.
|
|
buckets = {}
|
|
comparison_buckets = {}
|
|
for doc in docs:
|
|
buckets.setdefault(doc['bucket_name'], [])
|
|
buckets[doc['bucket_name']].append(doc)
|
|
for doc in comparison_docs:
|
|
comparison_buckets.setdefault(doc['bucket_name'], [])
|
|
comparison_buckets[doc['bucket_name']].append(doc)
|
|
|
|
# `shared_buckets` references buckets shared by both `revision_id` and
|
|
# `comparison_revision_id` -- i.e. their intersection.
|
|
shared_buckets = set(buckets.keys()).intersection(
|
|
comparison_buckets.keys())
|
|
# `unshared_buckets` references buckets not shared by both `revision_id`
|
|
# and `comparison_revision_id` -- i.e. their non-intersection.
|
|
unshared_buckets = set(buckets.keys()).union(
|
|
comparison_buckets.keys()) - shared_buckets
|
|
|
|
result = {}
|
|
|
|
def _compare_buckets(b1, b2):
|
|
# Checks whether buckets' documents are identical.
|
|
return (sorted([(d['data_hash'], d['metadata_hash']) for d in b1]) ==
|
|
sorted([(d['data_hash'], d['metadata_hash']) for d in b2]))
|
|
|
|
# If the list of documents for each bucket is identical, then the result
|
|
# is "unmodified", else "modified".
|
|
for bucket_name in shared_buckets:
|
|
unmodified = _compare_buckets(buckets[bucket_name],
|
|
comparison_buckets[bucket_name])
|
|
if unmodified:
|
|
result[bucket_name] = 'unmodified'
|
|
else:
|
|
result[bucket_name] = 'modified'
|
|
# If deepdiff enabled
|
|
if deepdiff:
|
|
# find out diff between buckets
|
|
bucket_diff = _diff_buckets(buckets[bucket_name],
|
|
comparison_buckets[bucket_name])
|
|
result[bucket_name + ' diff'] = bucket_diff
|
|
|
|
for bucket_name in unshared_buckets:
|
|
# If neither revision has documents, then there's nothing to compare.
|
|
# This is always True for revision_id == comparison_revision_id == 0.
|
|
if not any([revision, comparison_revision]):
|
|
break
|
|
# Else if one revision == 0 and the other revision != 0, then the
|
|
# bucket has been created. Which is zero or non-zero doesn't matter.
|
|
elif not all([revision, comparison_revision]):
|
|
result[bucket_name] = 'created'
|
|
# Else if `revision` is newer than `comparison_revision`, then if the
|
|
# `bucket_name` isn't in the `revision` buckets, then it has been
|
|
# deleted. Otherwise it has been created.
|
|
elif revision['created_at'] > comparison_revision['created_at']:
|
|
if bucket_name not in buckets:
|
|
result[bucket_name] = 'deleted'
|
|
elif bucket_name not in comparison_buckets:
|
|
result[bucket_name] = 'created'
|
|
# Else if `comparison_revision` is newer than `revision`, then if the
|
|
# `bucket_name` isn't in the `revision` buckets, then it has been
|
|
# created. Otherwise it has been deleted.
|
|
else:
|
|
if bucket_name not in buckets:
|
|
result[bucket_name] = 'created'
|
|
elif bucket_name not in comparison_buckets:
|
|
result[bucket_name] = 'deleted'
|
|
|
|
return result
|
|
|
|
|
|
def _diff_buckets(b1, b2):
|
|
"""Function to provide deep diff between two revisions"""
|
|
b1_tmp = {}
|
|
b2_tmp = {}
|
|
change_count = 0
|
|
change_details = {}
|
|
diff_result = {}
|
|
alias = lambda d: (d['schema'], d['name'])
|
|
|
|
b1_tmp.update({
|
|
alias(d): d
|
|
for d in b1
|
|
})
|
|
b2_tmp.update({
|
|
alias(d): d
|
|
for d in b2
|
|
})
|
|
|
|
# doc deleted
|
|
doc_deleted = list(set(b1_tmp.keys()) - set(b2_tmp.keys()))
|
|
# new doc added
|
|
doc_added = list(set(b2_tmp.keys()) - set(b1_tmp.keys()))
|
|
|
|
if len(doc_added) > 0:
|
|
diff_result.update({'document_added': {
|
|
'count': len(doc_added),
|
|
'details': doc_added}})
|
|
if len(doc_deleted) > 0:
|
|
diff_result.update({'document_deleted': {
|
|
'count': len(doc_deleted),
|
|
'details': doc_deleted}})
|
|
|
|
# find modified documents
|
|
for k, d in b1_tmp.items():
|
|
if k in b2_tmp:
|
|
# matched document, lets see changes
|
|
if (d['data_hash'], d['metadata_hash']) != (
|
|
b2_tmp[k]['data_hash'], b2_tmp[k]['metadata_hash']):
|
|
# document change counter
|
|
change_count += 1
|
|
|
|
data_changed = {'encrypted': True}
|
|
# if document is not encrypted then show diff else hide diff
|
|
# data.
|
|
if not b2_tmp[k].is_encrypted:
|
|
try:
|
|
# deepdiff returns deepdiff object. So first
|
|
# serializing to json then deserializing
|
|
# to dict.
|
|
data_changed = jsonpickle.decode(
|
|
DeepDiff(d['data'], b2_tmp[k]['data']).json)
|
|
# deepdiff doesn't provide custom exceptions;
|
|
# have to use Exception.
|
|
except Exception as ex:
|
|
raise errors.DeepDiffException(details=str(ex))
|
|
try:
|
|
metadata_changed = jsonpickle.decode(
|
|
DeepDiff(d['metadata'],
|
|
b2_tmp[k]['metadata']).json)
|
|
except Exception as ex:
|
|
raise errors.DeepDiffException(details=str(ex))
|
|
|
|
change_details.update({
|
|
str(k): {'data_changed': data_changed,
|
|
'metadata_changed': metadata_changed}})
|
|
|
|
if change_count > 0:
|
|
diff_result.update({'document_changed': {
|
|
'count': change_count,
|
|
'details': change_details
|
|
}})
|
|
# yaml_safedump failed to parse python objects;
|
|
# need diff result format before pass it yaml_safedump
|
|
return _format_diff_result(diff_result)
|
|
|
|
|
|
def _format_diff_result(dr):
|
|
"""Formats diff result per yaml_safedump parsing."""
|
|
if isinstance(dr, dict):
|
|
for k, v in dr.items():
|
|
if isinstance(v, RemapDict):
|
|
v = dict(v)
|
|
dr.update({k: v})
|
|
if isinstance(v, type):
|
|
dr.update({k: str(v)})
|
|
if isinstance(v, dict):
|
|
_format_diff_result(v)
|
|
return dr
|
|
|
|
|
|
def _rendered_doc(revision_id):
|
|
"""Provides rendered document by given revision id."""
|
|
filters = {'deleted': False}
|
|
rendered_documents, _ = common.get_rendered_docs(revision_id, **filters)
|
|
return rendered_documents
|