Optimize runtime for excluding deleted documents
Optimize runtime for excluding deleted documents in Deckhand's database layer. Currently the runtime is O(N^2): For each document, check to see if it is deleted, then iterate over every other document and delete it if its creation date is earlier than the current document's deletion date and their schema and metadata.name are the same (in other words delete every document from an earlier revision that was deleted in a more current one if it's the same document). The runtime was changed to O(NlogN). Change-Id: I4aa4e1429014731751288861735c705e6b6c6ed4
This commit is contained in:
parent
c418e5f5ad
commit
18704ff74d
|
@ -577,20 +577,22 @@ def revision_delete_all():
|
|||
|
||||
|
||||
def _exclude_deleted_documents(documents):
|
||||
"""Excludes all documents with ``deleted=True`` field including all
|
||||
documents earlier in the revision history with the same `metadata.name`
|
||||
and `schema` from ``documents``.
|
||||
"""Excludes all documents that have been deleted including all documents
|
||||
earlier in the revision history with the same ``metadata.name`` and
|
||||
``schema`` from ``documents``.
|
||||
"""
|
||||
for doc in copy.copy(documents):
|
||||
if doc['deleted']:
|
||||
docs_to_delete = [
|
||||
d for d in documents if
|
||||
(d['schema'], d['name']) == (doc['schema'], doc['name'])
|
||||
and d['created_at'] <= doc['deleted_at']
|
||||
]
|
||||
for d in list(docs_to_delete):
|
||||
documents.remove(d)
|
||||
return documents
|
||||
_documents_map = {} # (schema, metadata.name) => should be included?
|
||||
|
||||
for doc in sorted(documents, key=lambda x: x['created_at']):
|
||||
if doc['deleted'] is True:
|
||||
previous_doc = _documents_map.get((doc['schema'], doc['name']))
|
||||
if previous_doc:
|
||||
if doc['deleted_at'] >= previous_doc['created_at']:
|
||||
_documents_map[(doc['schema'], doc['name'])] = None
|
||||
else:
|
||||
_documents_map[(doc['schema'], doc['name'])] = doc
|
||||
|
||||
return [d for d in _documents_map.values() if d is not None]
|
||||
|
||||
|
||||
def _filter_revision_documents(documents, unique_only, **filters):
|
||||
|
@ -739,8 +741,8 @@ def revision_diff(revision_id, comparison_revision_id):
|
|||
|
||||
# Remove each deleted document and its older counterparts because those
|
||||
# documents technically don't exist.
|
||||
for documents in (docs, comparison_docs):
|
||||
documents = _exclude_deleted_documents(documents)
|
||||
docs = _exclude_deleted_documents(docs)
|
||||
comparison_docs = _exclude_deleted_documents(comparison_docs)
|
||||
|
||||
revision = revision_get(revision_id) if revision_id != 0 else None
|
||||
comparison_revision = (revision_get(comparison_revision_id)
|
||||
|
|
Loading…
Reference in New Issue