Optimize runtime for excluding deleted documents

Optimize runtime for excluding deleted documents in Deckhand's
database layer. Currently the runtime is O(N^2): For each
document, check to see if it is deleted, then iterate over
every other document and delete it if its creation date
is earlier than the current document's deletion date
and their schema and metadata.name are the same (in other
words delete every document from an earlier revision that
was deleted in a more current one if it's the same document).

The runtime was changed to O(NlogN).

Change-Id: I4aa4e1429014731751288861735c705e6b6c6ed4
This commit is contained in:
Felipe Monteiro 2018-01-20 22:45:03 -05:00
parent c418e5f5ad
commit 18704ff74d
1 changed files with 17 additions and 15 deletions

View File

@ -577,20 +577,22 @@ def revision_delete_all():
def _exclude_deleted_documents(documents):
"""Excludes all documents with ``deleted=True`` field including all
documents earlier in the revision history with the same `metadata.name`
and `schema` from ``documents``.
"""Excludes all documents that have been deleted including all documents
earlier in the revision history with the same ``metadata.name`` and
``schema`` from ``documents``.
"""
for doc in copy.copy(documents):
if doc['deleted']:
docs_to_delete = [
d for d in documents if
(d['schema'], d['name']) == (doc['schema'], doc['name'])
and d['created_at'] <= doc['deleted_at']
]
for d in list(docs_to_delete):
documents.remove(d)
return documents
_documents_map = {} # (schema, metadata.name) => should be included?
for doc in sorted(documents, key=lambda x: x['created_at']):
if doc['deleted'] is True:
previous_doc = _documents_map.get((doc['schema'], doc['name']))
if previous_doc:
if doc['deleted_at'] >= previous_doc['created_at']:
_documents_map[(doc['schema'], doc['name'])] = None
else:
_documents_map[(doc['schema'], doc['name'])] = doc
return [d for d in _documents_map.values() if d is not None]
def _filter_revision_documents(documents, unique_only, **filters):
@ -739,8 +741,8 @@ def revision_diff(revision_id, comparison_revision_id):
# Remove each deleted document and its older counterparts because those
# documents technically don't exist.
for documents in (docs, comparison_docs):
documents = _exclude_deleted_documents(documents)
docs = _exclude_deleted_documents(docs)
comparison_docs = _exclude_deleted_documents(comparison_docs)
revision = revision_get(revision_id) if revision_id != 0 else None
comparison_revision = (revision_get(comparison_revision_id)