From 18704ff74d55fa16da675041526c3a48a83d1698 Mon Sep 17 00:00:00 2001 From: Felipe Monteiro Date: Sat, 20 Jan 2018 22:45:03 -0500 Subject: [PATCH] Optimize runtime for excluding deleted documents Optimize runtime for excluding deleted documents in Deckhand's database layer. Currently the runtime is O(N^2): For each document, check to see if it is deleted, then iterate over every other document and delete it if its creation date is earlier than the current document's deletion date and their schema and metadata.name are the same (in other words delete every document from an earlier revision that was deleted in a more current one if it's the same document). The runtime was changed to O(NlogN). Change-Id: I4aa4e1429014731751288861735c705e6b6c6ed4 --- deckhand/db/sqlalchemy/api.py | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/deckhand/db/sqlalchemy/api.py b/deckhand/db/sqlalchemy/api.py index 0d51d464..3110fb28 100644 --- a/deckhand/db/sqlalchemy/api.py +++ b/deckhand/db/sqlalchemy/api.py @@ -577,20 +577,22 @@ def revision_delete_all(): def _exclude_deleted_documents(documents): - """Excludes all documents with ``deleted=True`` field including all - documents earlier in the revision history with the same `metadata.name` - and `schema` from ``documents``. + """Excludes all documents that have been deleted including all documents + earlier in the revision history with the same ``metadata.name`` and + ``schema`` from ``documents``. """ - for doc in copy.copy(documents): - if doc['deleted']: - docs_to_delete = [ - d for d in documents if - (d['schema'], d['name']) == (doc['schema'], doc['name']) - and d['created_at'] <= doc['deleted_at'] - ] - for d in list(docs_to_delete): - documents.remove(d) - return documents + _documents_map = {} # (schema, metadata.name) => should be included? + + for doc in sorted(documents, key=lambda x: x['created_at']): + if doc['deleted'] is True: + previous_doc = _documents_map.get((doc['schema'], doc['name'])) + if previous_doc: + if doc['deleted_at'] >= previous_doc['created_at']: + _documents_map[(doc['schema'], doc['name'])] = None + else: + _documents_map[(doc['schema'], doc['name'])] = doc + + return [d for d in _documents_map.values() if d is not None] def _filter_revision_documents(documents, unique_only, **filters): @@ -739,8 +741,8 @@ def revision_diff(revision_id, comparison_revision_id): # Remove each deleted document and its older counterparts because those # documents technically don't exist. - for documents in (docs, comparison_docs): - documents = _exclude_deleted_documents(documents) + docs = _exclude_deleted_documents(docs) + comparison_docs = _exclude_deleted_documents(comparison_docs) revision = revision_get(revision_id) if revision_id != 0 else None comparison_revision = (revision_get(comparison_revision_id)