Revamp document hashing
This PS revamps document hashing. Instead of relying on Python's built-in hash function to hash the contents of a document (i.e. metadata and data values), sha256 from hashlib is used instead, mostly for security purposes. Further, new parameters have been added to the document DB model: data_hash and metadata_hash, and the old value hash has been dropped. The data type for storing the hashes has been changed to String from BigInt. Finally, testing documentation was added. Change-Id: I428ddcbce1007ea990ca0df1aa630072a050c722
This commit is contained in:
parent
81b3e42013
commit
8bf4f7407d
|
@ -18,6 +18,7 @@
|
|||
import ast
|
||||
import copy
|
||||
import functools
|
||||
import hashlib
|
||||
import threading
|
||||
|
||||
from oslo_config import cfg
|
||||
|
@ -25,6 +26,7 @@ from oslo_db import exception as db_exception
|
|||
from oslo_db import options
|
||||
from oslo_db.sqlalchemy import session
|
||||
from oslo_log import log as logging
|
||||
from oslo_serialization import jsonutils as json
|
||||
import six
|
||||
import sqlalchemy.orm as sa_orm
|
||||
|
||||
|
@ -136,7 +138,8 @@ def documents_create(bucket_name, documents, session=None):
|
|||
doc['name'] = d[1]
|
||||
doc['data'] = {}
|
||||
doc['_metadata'] = {}
|
||||
doc['hash'] = utils.make_hash({})
|
||||
doc['data_hash'] = _make_hash({})
|
||||
doc['metadata_hash'] = _make_hash({})
|
||||
doc['bucket_id'] = bucket['id']
|
||||
doc['revision_id'] = revision['id']
|
||||
|
||||
|
@ -178,19 +181,12 @@ def _documents_create(bucket_name, values_list, session=None):
|
|||
for values in values_list:
|
||||
values['_metadata'] = values.pop('metadata')
|
||||
values['name'] = values['_metadata']['name']
|
||||
|
||||
# Hash the combination of the document's metadata and data to later
|
||||
# efficiently check whether those data have changed.
|
||||
dict_to_hash = values['_metadata'].copy()
|
||||
dict_to_hash.update(values['data'])
|
||||
values['hash'] = utils.make_hash(dict_to_hash)
|
||||
|
||||
values['is_secret'] = 'secret' in values['data']
|
||||
# Hash the combination of the document's metadata and data to later
|
||||
# efficiently check whether those data have changed.
|
||||
dict_to_hash = values['_metadata'].copy()
|
||||
dict_to_hash.update(values['data'])
|
||||
values['hash'] = utils.make_hash(dict_to_hash)
|
||||
|
||||
# Hash the document's metadata and data to later efficiently check
|
||||
# whether those data have changed.
|
||||
values['data_hash'] = _make_hash(values['data'])
|
||||
values['metadata_hash'] = _make_hash(values['_metadata'])
|
||||
|
||||
try:
|
||||
existing_document = document_get(
|
||||
|
@ -211,7 +207,8 @@ def _documents_create(bucket_name, values_list, session=None):
|
|||
name=existing_document['name'],
|
||||
bucket=existing_document['bucket_name'])
|
||||
|
||||
if existing_document['hash'] == values['hash']:
|
||||
if (existing_document['data_hash'] == values['data_hash'] and
|
||||
existing_document['metadata_hash'] == values['metadata_hash']):
|
||||
# Since the document has not changed, reference the original
|
||||
# revision in which it was created. This is necessary so that
|
||||
# the correct revision history is maintained.
|
||||
|
@ -231,6 +228,11 @@ def _documents_create(bucket_name, values_list, session=None):
|
|||
return changed_documents
|
||||
|
||||
|
||||
def _make_hash(data):
|
||||
return hashlib.sha256(
|
||||
json.dumps(data, sort_keys=True).encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
def document_get(session=None, raw_dict=False, **filters):
|
||||
"""Retrieve a document from the DB.
|
||||
|
||||
|
@ -482,6 +484,7 @@ def _filter_revision_documents(documents, unique_only, **filters):
|
|||
if unique_key not in filtered_documents:
|
||||
filtered_documents[unique_key] = document
|
||||
|
||||
# TODO(fmontei): Sort by user-specified parameter.
|
||||
return sorted(filtered_documents.values(), key=lambda d: d['created_at'])
|
||||
|
||||
|
||||
|
@ -586,8 +589,8 @@ def revision_diff(revision_id, comparison_revision_id):
|
|||
|
||||
def _compare_buckets(b1, b2):
|
||||
# Checks whether buckets' documents are identical.
|
||||
return (sorted([d['hash'] for d in b1]) ==
|
||||
sorted([d['hash'] for d in b2]))
|
||||
return (sorted([(d['data_hash'], d['metadata_hash']) for d in b1]) ==
|
||||
sorted([(d['data_hash'], d['metadata_hash']) for d in b2]))
|
||||
|
||||
# If the list of documents for each bucket is indentical, then the result
|
||||
# is "unmodified", else "modified".
|
||||
|
@ -753,7 +756,9 @@ def revision_rollback(revision_id, session=None):
|
|||
latest_revision = session.query(models.Revision)\
|
||||
.order_by(models.Revision.created_at.desc())\
|
||||
.first()
|
||||
latest_revision_hashes = [d['hash'] for d in latest_revision['documents']]
|
||||
latest_revision_hashes = [
|
||||
(d['data_hash'], d['metadata_hash'])
|
||||
for d in latest_revision['documents']]
|
||||
|
||||
# If the rollback revision is the same as the latest revision, then there's
|
||||
# no point in rolling back.
|
||||
|
@ -767,12 +772,13 @@ def revision_rollback(revision_id, session=None):
|
|||
# it has changed, else False.
|
||||
doc_diff = {}
|
||||
for orig_doc in orig_revision['documents']:
|
||||
if orig_doc['hash'] not in latest_revision_hashes:
|
||||
if ((orig_doc['data_hash'], orig_doc['metadata_hash'])
|
||||
not in latest_revision_hashes):
|
||||
doc_diff[orig_doc['id']] = True
|
||||
else:
|
||||
doc_diff[orig_doc['id']] = False
|
||||
|
||||
# If no changges have been made between the target revision to rollback to
|
||||
# If no changes have been made between the target revision to rollback to
|
||||
# and the latest revision, raise an exception.
|
||||
if set(doc_diff.values()) == set([False]):
|
||||
raise errors.InvalidRollback(revision_id=revision_id)
|
||||
|
@ -789,8 +795,8 @@ def revision_rollback(revision_id, session=None):
|
|||
|
||||
new_document = models.Document()
|
||||
new_document.update({x: orig_document[x] for x in (
|
||||
'name', '_metadata', 'data', 'hash', 'schema', 'bucket_id')})
|
||||
|
||||
'name', '_metadata', 'data', 'data_hash', 'metadata_hash',
|
||||
'schema', 'bucket_id')})
|
||||
new_document['revision_id'] = new_revision['id']
|
||||
|
||||
# If the document has changed, then use the revision_id of the new
|
||||
|
|
|
@ -15,7 +15,6 @@
|
|||
from oslo_db.sqlalchemy import models
|
||||
from oslo_db.sqlalchemy import types as oslo_types
|
||||
from oslo_utils import timeutils
|
||||
from sqlalchemy import BigInteger
|
||||
from sqlalchemy import Boolean
|
||||
from sqlalchemy import Column
|
||||
from sqlalchemy import DateTime
|
||||
|
@ -141,7 +140,8 @@ class Document(BASE, DeckhandBase):
|
|||
# "metadata" is reserved, so use "_metadata" instead.
|
||||
_metadata = Column(oslo_types.JsonEncodedDict(), nullable=False)
|
||||
data = Column(oslo_types.JsonEncodedDict(), nullable=True)
|
||||
hash = Column(BigInteger, nullable=False)
|
||||
data_hash = Column(String, nullable=False)
|
||||
metadata_hash = Column(String, nullable=False)
|
||||
is_secret = Column(Boolean, nullable=False, default=False)
|
||||
bucket_id = Column(Integer, ForeignKey('buckets.id', ondelete='CASCADE'),
|
||||
nullable=False)
|
||||
|
|
|
@ -12,6 +12,8 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import os
|
||||
|
||||
import fixtures
|
||||
import mock
|
||||
from oslo_config import cfg
|
||||
|
@ -76,6 +78,8 @@ class DeckhandWithDBTestCase(DeckhandTestCase):
|
|||
|
||||
def setUp(self):
|
||||
super(DeckhandWithDBTestCase, self).setUp()
|
||||
self.override_config('connection', "sqlite://", group='database')
|
||||
self.override_config(
|
||||
'connection', os.environ.get('PIFPAF_URL', 'sqlite://'),
|
||||
group='database')
|
||||
db_api.setup_db()
|
||||
self.addCleanup(db_api.drop_db)
|
||||
|
|
|
@ -20,8 +20,8 @@ from deckhand.tests.unit import base
|
|||
|
||||
BASE_EXPECTED_FIELDS = ("created_at", "updated_at", "deleted_at", "deleted")
|
||||
DOCUMENT_EXPECTED_FIELDS = BASE_EXPECTED_FIELDS + (
|
||||
"id", "schema", "name", "metadata", "data", "hash", "revision_id",
|
||||
"bucket_id")
|
||||
"id", "schema", "name", "metadata", "data", "data_hash", "metadata_hash",
|
||||
"revision_id", "bucket_id")
|
||||
REVISION_EXPECTED_FIELDS = ("id", "documents", "tags")
|
||||
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ class TestDocumentsNegative(base.TestDbBase):
|
|||
def test_delete_document_invalid_id(self):
|
||||
self.assertRaises(errors.DocumentNotFound,
|
||||
self.show_document,
|
||||
id=test_utils.rand_uuid_hex())
|
||||
id=-1)
|
||||
|
||||
def test_create_bucket_conflict(self):
|
||||
# Create the document in one bucket.
|
||||
|
|
|
@ -14,7 +14,6 @@
|
|||
|
||||
from deckhand.db.sqlalchemy import api as db_api
|
||||
from deckhand import errors
|
||||
from deckhand.tests import test_utils
|
||||
from deckhand.tests.unit.db import base
|
||||
|
||||
|
||||
|
@ -22,25 +21,20 @@ class TestRevisionTagsNegative(base.TestDbBase):
|
|||
|
||||
def test_create_tag_revision_not_found(self):
|
||||
self.assertRaises(
|
||||
errors.RevisionNotFound, db_api.revision_tag_create,
|
||||
test_utils.rand_uuid_hex())
|
||||
errors.RevisionNotFound, db_api.revision_tag_create, -1)
|
||||
|
||||
def test_show_tag_revision_not_found(self):
|
||||
self.assertRaises(
|
||||
errors.RevisionNotFound, db_api.revision_tag_get,
|
||||
test_utils.rand_uuid_hex())
|
||||
errors.RevisionNotFound, db_api.revision_tag_get, -1)
|
||||
|
||||
def test_delete_tag_revision_not_found(self):
|
||||
self.assertRaises(
|
||||
errors.RevisionNotFound, db_api.revision_tag_delete,
|
||||
test_utils.rand_uuid_hex())
|
||||
errors.RevisionNotFound, db_api.revision_tag_delete, -1)
|
||||
|
||||
def test_list_tags_revision_not_found(self):
|
||||
self.assertRaises(
|
||||
errors.RevisionNotFound, db_api.revision_tag_get_all,
|
||||
test_utils.rand_uuid_hex())
|
||||
errors.RevisionNotFound, db_api.revision_tag_get_all, -1)
|
||||
|
||||
def test_delete_all_tags_revision_not_found(self):
|
||||
self.assertRaises(
|
||||
errors.RevisionNotFound, db_api.revision_tag_delete_all,
|
||||
test_utils.rand_uuid_hex())
|
||||
errors.RevisionNotFound, db_api.revision_tag_delete_all, -1)
|
||||
|
|
|
@ -12,7 +12,6 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import copy
|
||||
import re
|
||||
import string
|
||||
|
||||
|
@ -61,21 +60,3 @@ def multi_getattr(multi_key, dict_data):
|
|||
data = data.get(attr)
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def make_hash(o):
|
||||
"""Makes a hash from a dictionary, list, tuple or set to any level, that
|
||||
contains only other hashable types (including any lists, tuples, sets, and
|
||||
dictionaries).
|
||||
"""
|
||||
if isinstance(o, (set, tuple, list)):
|
||||
return tuple([make_hash(e) for e in o])
|
||||
|
||||
elif not isinstance(o, dict):
|
||||
return hash(o)
|
||||
|
||||
new_o = copy.deepcopy(o)
|
||||
for k, v in new_o.items():
|
||||
new_o[k] = make_hash(v)
|
||||
|
||||
return hash(tuple(frozenset(sorted(new_o.items()))))
|
||||
|
|
|
@ -674,7 +674,7 @@ Supported query string parameters:
|
|||
`metadata.label=key=value`. Repeating this parameter indicates all
|
||||
requested labels must apply (AND not OR).
|
||||
* `sort` - string, optional, repeatable - Defines the sort order for returning
|
||||
results. Default is `metadata.name`. Repeating this parameter indicates use
|
||||
results. Default is by creation date. Repeating this parameter indicates use
|
||||
of multi-column sort with the most significant sorting column applied first.
|
||||
* `status.bucket` - string, optional, repeatable - Used to select documents
|
||||
only from a particular bucket. Repeating this parameter indicates documents
|
||||
|
|
|
@ -36,6 +36,7 @@ consumption by other UCP services.
|
|||
:maxdepth: 2
|
||||
|
||||
HACKING
|
||||
testing
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
|
|
@ -0,0 +1,97 @@
|
|||
..
|
||||
Copyright 2017 AT&T Intellectual Property. All other rights reserved.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
|
||||
=======
|
||||
Testing
|
||||
=======
|
||||
|
||||
Unit testing
|
||||
============
|
||||
|
||||
Unit testing currently uses an in-memory sqlite database. Since Deckhand's
|
||||
primary function is to serve as the back-end storage for UCP, the majority
|
||||
of unit tests perform actual database operations. Mocking is used sparingly
|
||||
because Deckhand is a fairly insular application that lives at the bottom
|
||||
of a very deep stack; Deckhand only communicates with Keystone and Barbican.
|
||||
As such, validating database operations is paramount to correctly testing
|
||||
Deckhand.
|
||||
|
||||
To run unit tests using sqlite, execute::
|
||||
|
||||
$ tox -epy27
|
||||
$ tox -epy35
|
||||
|
||||
against a py27- or py35-backed environment, respectively. To run individual
|
||||
unit tests, run::
|
||||
|
||||
$ tox -e py27 -- deckhand.tests.unit.db.test_revisions
|
||||
|
||||
for example.
|
||||
|
||||
To run unit tests using postgresql, execute::
|
||||
|
||||
$ tox -epy27-postgresql
|
||||
$ tox -epy35-postgresql
|
||||
|
||||
against a py27- or py35-backed environment, respectively. Individual unit tests
|
||||
can be executed the same way as above.
|
||||
|
||||
`pifpaf <https://github.com/jd/pifpaf>`_ is used to spin up a temporary
|
||||
postgresql database. The URL is set up as an environment variable via
|
||||
``PIFPAF_URL``.
|
||||
|
||||
.. warning::
|
||||
|
||||
It is **not** recommended to run postgresql-backed unit tests concurrently.
|
||||
Only run them serially. This is because, to guarantee true test isolation,
|
||||
the DB tables are re-created each test run. Only one instance of postgresql
|
||||
is created across all threads, thus causing major conflicts if concurrency
|
||||
> 1.
|
||||
|
||||
Functional testing
|
||||
==================
|
||||
|
||||
Prerequisites
|
||||
-------------
|
||||
Deckhand requires Docker to run its functional tests. A basic installation
|
||||
guide for Docker for Ubuntu can be found
|
||||
`here <https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/>`_.
|
||||
|
||||
Overview
|
||||
--------
|
||||
Deckhand uses `gabbi <https://github.com/cdent/gabbi>`_ as its functional
|
||||
testing framework. Functional tests can be executed via::
|
||||
|
||||
$ tox -e functional
|
||||
|
||||
You can also run a subset of tests via a regex::
|
||||
|
||||
$ tox -e functional -- gabbi.suitemaker.test_gabbi_document-crud-success-multi-bucket
|
||||
|
||||
The command executes ``tools/functional-tests.sh`` which:
|
||||
|
||||
1) Launches Postgresql inside a Docker container.
|
||||
2) Sets up a basic Deckhand configuration file that uses Postgresql
|
||||
in its ``oslo_db`` connection string.
|
||||
3) Sets up a custom policy file with very liberal permissions so that
|
||||
gabbi can talk to Deckhand without having to authenticate against
|
||||
Keystone and pass an admin token to Deckhand.
|
||||
4) Instantiates Deckhand via ``uwisgi``.
|
||||
5) Calls gabbi which runs a battery of functional tests.
|
||||
|
||||
At this time, there are no functional tests for policy enforcement
|
||||
verification. Negative tests will be added at a later date to confirm that
|
||||
a 403 Forbidden is raised for each endpoint that does policy enforcement
|
||||
absent necessary permissions.
|
|
@ -17,3 +17,4 @@ bandit>=1.1.0 # Apache-2.0
|
|||
sphinx>=1.6.2 # BSD
|
||||
gabbi==1.35.1
|
||||
sphinx_rtd_theme==0.2.4
|
||||
pifpaf==0.10.0
|
||||
|
|
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
set -o pipefail
|
||||
|
||||
TESTRARGS=$1
|
||||
|
||||
# --until-failure is not compatible with --subunit see:
|
||||
#
|
||||
# https://bugs.launchpad.net/testrepository/+bug/1411804
|
||||
#
|
||||
# this work around exists until that is addressed
|
||||
if [[ "$TESTARGS" =~ "until-failure" ]]; then
|
||||
python setup.py testr --slowest --testr-args="$TESTRARGS"
|
||||
else
|
||||
python setup.py testr --slowest --testr-args="--subunit $TESTRARGS" | subunit-trace -f
|
||||
fi
|
10
tox.ini
10
tox.ini
|
@ -23,11 +23,21 @@ commands =
|
|||
{[testenv]commands}
|
||||
ostestr '{posargs}'
|
||||
|
||||
[testenv:py27-postgresql]
|
||||
commands =
|
||||
{[testenv]commands}
|
||||
pifpaf run postgresql -- '{toxinidir}'/tools/pretty_tox.sh '--concurrency=1 {posargs}'
|
||||
|
||||
[testenv:py35]
|
||||
commands =
|
||||
{[testenv]commands}
|
||||
ostestr '{posargs}'
|
||||
|
||||
[testenv:py35-postgresql]
|
||||
commands =
|
||||
{[testenv]commands}
|
||||
pifpaf run postgresql -- '{toxinidir}'/tools/pretty_tox.sh '--concurrency=1 {posargs}'
|
||||
|
||||
[testenv:functional]
|
||||
usedevelop = True
|
||||
setenv = VIRTUAL_ENV={envdir}
|
||||
|
|
Loading…
Reference in New Issue