diff --git a/deckhand/engine/document_validation.py b/deckhand/engine/document_validation.py index 19fa6598..4b70f893 100644 --- a/deckhand/engine/document_validation.py +++ b/deckhand/engine/document_validation.py @@ -360,6 +360,31 @@ class DataSchemaValidator(GenericValidator): schema, document, error, root_path) +class DuplicateDocumentValidator(BaseValidator): + """Validator used for guarding against duplicate documents.""" + + def __init__(self): + super(DuplicateDocumentValidator, self).__init__() + self._document_history = set() + self._diagnostic = ('Ensure that each raw document has a unique ' + 'combination of (name, schema, ' + 'metadata.layeringDefinition.layer).') + + def validate(self, document, **kwargs): + """Validates that duplicate document doesn't exist.""" + if document.meta in self._document_history: + validation_message = vm.ValidationMessage( + message="Duplicate document exists", + doc_schema=document.schema, + doc_name=document.name, + doc_layer=document.layer, + diagnostic=self._diagnostic) + return [validation_message.format_message()] + else: + self._document_history.add(document.meta) + return [] + + class DocumentValidation(object): def __init__(self, documents, existing_data_schemas=None, @@ -425,12 +450,16 @@ class DocumentValidation(object): self._documents.append(document) - self._validators = [ - DataSchemaValidator(self._external_data_schemas) - ] - self._pre_validate = pre_validate + self._validators = [ + DataSchemaValidator(self._external_data_schemas), + ] + if self._pre_validate: + # Only perform this additional validation "offline". The controller + # need not call this as the db module will handle this validation. + self._validators.append(DuplicateDocumentValidator()) + def _get_supported_schema_list(self): schema_list = [] validator = self._validators[-1] diff --git a/deckhand/tests/unit/engine/test_document_validation.py b/deckhand/tests/unit/engine/test_document_validation.py index db04c54b..65680050 100644 --- a/deckhand/tests/unit/engine/test_document_validation.py +++ b/deckhand/tests/unit/engine/test_document_validation.py @@ -98,6 +98,41 @@ class TestDocumentValidation(engine_test_base.TestDocumentValidationBase): str(validations[0]['errors'][-1])) self.assertNotIn('scary-secret.', str(validations[0]['errors'][-1])) + def test_validation_document_duplication(self): + """Validate that duplicate document fails when duplicate passed in.""" + test_document = self._read_data('sample_document') + + # Should only fail when pre_validate is True as the `db` module already + # handles this on behalf of the controller. + validations = document_validation.DocumentValidation( + [test_document] * 2, # Provide 2 of the same document. + pre_validate=True).validate_all() + + expected_error = { + 'diagnostic': mock.ANY, + 'documents': [{ + 'layer': test_document['metadata']['layeringDefinition'][ + 'layer'], + 'name': test_document['metadata']['name'], + 'schema': test_document['schema'] + }], + 'error': True, + 'kind': 'ValidationMessage', + 'level': 'Error', + 'message': 'Duplicate document exists', + 'name': 'Deckhand validation error' + } + + self.assertEqual(1, len(validations[1]['errors'])) + self.assertEqual(expected_error, + validations[1]['errors'][0]) + + # With pre_validate=False the validation should skip. + validations = document_validation.DocumentValidation( + [test_document] * 2, # Provide 2 of the same document. + pre_validate=False).validate_all() + self.assertEmpty(validations[1]['errors']) + def test_validation_failure_sanitizes_message_secrets(self): data_schema_factory = factories.DataSchemaFactory() metadata_name = 'example/Doc/v1'