Revamp document hashing

This PS revamps document hashing. Instead of relying on Python's
built-in hash function to hash the contents of a document (i.e.
metadata and data values), sha256 from hashlib is used instead,
mostly for security purposes.

Further, new parameters have been added to the document DB model:
data_hash and metadata_hash, and the old value hash has been
dropped. The data type for storing the hashes has been changed
to String from BigInt.

Finally, testing documentation was added.

Change-Id: I428ddcbce1007ea990ca0df1aa630072a050c722
This commit is contained in:
Felipe Monteiro 2017-09-26 01:08:05 +01:00
parent 81b3e42013
commit 8bf4f7407d
13 changed files with 168 additions and 58 deletions

View File

@ -18,6 +18,7 @@
import ast
import copy
import functools
import hashlib
import threading
from oslo_config import cfg
@ -25,6 +26,7 @@ from oslo_db import exception as db_exception
from oslo_db import options
from oslo_db.sqlalchemy import session
from oslo_log import log as logging
from oslo_serialization import jsonutils as json
import six
import sqlalchemy.orm as sa_orm
@ -136,7 +138,8 @@ def documents_create(bucket_name, documents, session=None):
doc['name'] = d[1]
doc['data'] = {}
doc['_metadata'] = {}
doc['hash'] = utils.make_hash({})
doc['data_hash'] = _make_hash({})
doc['metadata_hash'] = _make_hash({})
doc['bucket_id'] = bucket['id']
doc['revision_id'] = revision['id']
@ -178,19 +181,12 @@ def _documents_create(bucket_name, values_list, session=None):
for values in values_list:
values['_metadata'] = values.pop('metadata')
values['name'] = values['_metadata']['name']
# Hash the combination of the document's metadata and data to later
# efficiently check whether those data have changed.
dict_to_hash = values['_metadata'].copy()
dict_to_hash.update(values['data'])
values['hash'] = utils.make_hash(dict_to_hash)
values['is_secret'] = 'secret' in values['data']
# Hash the combination of the document's metadata and data to later
# efficiently check whether those data have changed.
dict_to_hash = values['_metadata'].copy()
dict_to_hash.update(values['data'])
values['hash'] = utils.make_hash(dict_to_hash)
# Hash the document's metadata and data to later efficiently check
# whether those data have changed.
values['data_hash'] = _make_hash(values['data'])
values['metadata_hash'] = _make_hash(values['_metadata'])
try:
existing_document = document_get(
@ -211,7 +207,8 @@ def _documents_create(bucket_name, values_list, session=None):
name=existing_document['name'],
bucket=existing_document['bucket_name'])
if existing_document['hash'] == values['hash']:
if (existing_document['data_hash'] == values['data_hash'] and
existing_document['metadata_hash'] == values['metadata_hash']):
# Since the document has not changed, reference the original
# revision in which it was created. This is necessary so that
# the correct revision history is maintained.
@ -231,6 +228,11 @@ def _documents_create(bucket_name, values_list, session=None):
return changed_documents
def _make_hash(data):
return hashlib.sha256(
json.dumps(data, sort_keys=True).encode('utf-8')).hexdigest()
def document_get(session=None, raw_dict=False, **filters):
"""Retrieve a document from the DB.
@ -482,6 +484,7 @@ def _filter_revision_documents(documents, unique_only, **filters):
if unique_key not in filtered_documents:
filtered_documents[unique_key] = document
# TODO(fmontei): Sort by user-specified parameter.
return sorted(filtered_documents.values(), key=lambda d: d['created_at'])
@ -586,8 +589,8 @@ def revision_diff(revision_id, comparison_revision_id):
def _compare_buckets(b1, b2):
# Checks whether buckets' documents are identical.
return (sorted([d['hash'] for d in b1]) ==
sorted([d['hash'] for d in b2]))
return (sorted([(d['data_hash'], d['metadata_hash']) for d in b1]) ==
sorted([(d['data_hash'], d['metadata_hash']) for d in b2]))
# If the list of documents for each bucket is indentical, then the result
# is "unmodified", else "modified".
@ -753,7 +756,9 @@ def revision_rollback(revision_id, session=None):
latest_revision = session.query(models.Revision)\
.order_by(models.Revision.created_at.desc())\
.first()
latest_revision_hashes = [d['hash'] for d in latest_revision['documents']]
latest_revision_hashes = [
(d['data_hash'], d['metadata_hash'])
for d in latest_revision['documents']]
# If the rollback revision is the same as the latest revision, then there's
# no point in rolling back.
@ -767,12 +772,13 @@ def revision_rollback(revision_id, session=None):
# it has changed, else False.
doc_diff = {}
for orig_doc in orig_revision['documents']:
if orig_doc['hash'] not in latest_revision_hashes:
if ((orig_doc['data_hash'], orig_doc['metadata_hash'])
not in latest_revision_hashes):
doc_diff[orig_doc['id']] = True
else:
doc_diff[orig_doc['id']] = False
# If no changges have been made between the target revision to rollback to
# If no changes have been made between the target revision to rollback to
# and the latest revision, raise an exception.
if set(doc_diff.values()) == set([False]):
raise errors.InvalidRollback(revision_id=revision_id)
@ -789,8 +795,8 @@ def revision_rollback(revision_id, session=None):
new_document = models.Document()
new_document.update({x: orig_document[x] for x in (
'name', '_metadata', 'data', 'hash', 'schema', 'bucket_id')})
'name', '_metadata', 'data', 'data_hash', 'metadata_hash',
'schema', 'bucket_id')})
new_document['revision_id'] = new_revision['id']
# If the document has changed, then use the revision_id of the new

View File

@ -15,7 +15,6 @@
from oslo_db.sqlalchemy import models
from oslo_db.sqlalchemy import types as oslo_types
from oslo_utils import timeutils
from sqlalchemy import BigInteger
from sqlalchemy import Boolean
from sqlalchemy import Column
from sqlalchemy import DateTime
@ -141,7 +140,8 @@ class Document(BASE, DeckhandBase):
# "metadata" is reserved, so use "_metadata" instead.
_metadata = Column(oslo_types.JsonEncodedDict(), nullable=False)
data = Column(oslo_types.JsonEncodedDict(), nullable=True)
hash = Column(BigInteger, nullable=False)
data_hash = Column(String, nullable=False)
metadata_hash = Column(String, nullable=False)
is_secret = Column(Boolean, nullable=False, default=False)
bucket_id = Column(Integer, ForeignKey('buckets.id', ondelete='CASCADE'),
nullable=False)

View File

@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import fixtures
import mock
from oslo_config import cfg
@ -76,6 +78,8 @@ class DeckhandWithDBTestCase(DeckhandTestCase):
def setUp(self):
super(DeckhandWithDBTestCase, self).setUp()
self.override_config('connection', "sqlite://", group='database')
self.override_config(
'connection', os.environ.get('PIFPAF_URL', 'sqlite://'),
group='database')
db_api.setup_db()
self.addCleanup(db_api.drop_db)

View File

@ -20,8 +20,8 @@ from deckhand.tests.unit import base
BASE_EXPECTED_FIELDS = ("created_at", "updated_at", "deleted_at", "deleted")
DOCUMENT_EXPECTED_FIELDS = BASE_EXPECTED_FIELDS + (
"id", "schema", "name", "metadata", "data", "hash", "revision_id",
"bucket_id")
"id", "schema", "name", "metadata", "data", "data_hash", "metadata_hash",
"revision_id", "bucket_id")
REVISION_EXPECTED_FIELDS = ("id", "documents", "tags")

View File

@ -44,7 +44,7 @@ class TestDocumentsNegative(base.TestDbBase):
def test_delete_document_invalid_id(self):
self.assertRaises(errors.DocumentNotFound,
self.show_document,
id=test_utils.rand_uuid_hex())
id=-1)
def test_create_bucket_conflict(self):
# Create the document in one bucket.

View File

@ -14,7 +14,6 @@
from deckhand.db.sqlalchemy import api as db_api
from deckhand import errors
from deckhand.tests import test_utils
from deckhand.tests.unit.db import base
@ -22,25 +21,20 @@ class TestRevisionTagsNegative(base.TestDbBase):
def test_create_tag_revision_not_found(self):
self.assertRaises(
errors.RevisionNotFound, db_api.revision_tag_create,
test_utils.rand_uuid_hex())
errors.RevisionNotFound, db_api.revision_tag_create, -1)
def test_show_tag_revision_not_found(self):
self.assertRaises(
errors.RevisionNotFound, db_api.revision_tag_get,
test_utils.rand_uuid_hex())
errors.RevisionNotFound, db_api.revision_tag_get, -1)
def test_delete_tag_revision_not_found(self):
self.assertRaises(
errors.RevisionNotFound, db_api.revision_tag_delete,
test_utils.rand_uuid_hex())
errors.RevisionNotFound, db_api.revision_tag_delete, -1)
def test_list_tags_revision_not_found(self):
self.assertRaises(
errors.RevisionNotFound, db_api.revision_tag_get_all,
test_utils.rand_uuid_hex())
errors.RevisionNotFound, db_api.revision_tag_get_all, -1)
def test_delete_all_tags_revision_not_found(self):
self.assertRaises(
errors.RevisionNotFound, db_api.revision_tag_delete_all,
test_utils.rand_uuid_hex())
errors.RevisionNotFound, db_api.revision_tag_delete_all, -1)

View File

@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import re
import string
@ -61,21 +60,3 @@ def multi_getattr(multi_key, dict_data):
data = data.get(attr)
return data
def make_hash(o):
"""Makes a hash from a dictionary, list, tuple or set to any level, that
contains only other hashable types (including any lists, tuples, sets, and
dictionaries).
"""
if isinstance(o, (set, tuple, list)):
return tuple([make_hash(e) for e in o])
elif not isinstance(o, dict):
return hash(o)
new_o = copy.deepcopy(o)
for k, v in new_o.items():
new_o[k] = make_hash(v)
return hash(tuple(frozenset(sorted(new_o.items()))))

View File

@ -674,7 +674,7 @@ Supported query string parameters:
`metadata.label=key=value`. Repeating this parameter indicates all
requested labels must apply (AND not OR).
* `sort` - string, optional, repeatable - Defines the sort order for returning
results. Default is `metadata.name`. Repeating this parameter indicates use
results. Default is by creation date. Repeating this parameter indicates use
of multi-column sort with the most significant sorting column applied first.
* `status.bucket` - string, optional, repeatable - Used to select documents
only from a particular bucket. Repeating this parameter indicates documents

View File

@ -36,6 +36,7 @@ consumption by other UCP services.
:maxdepth: 2
HACKING
testing
.. toctree::
:maxdepth: 1

97
doc/source/testing.rst Normal file
View File

@ -0,0 +1,97 @@
..
Copyright 2017 AT&T Intellectual Property. All other rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
=======
Testing
=======
Unit testing
============
Unit testing currently uses an in-memory sqlite database. Since Deckhand's
primary function is to serve as the back-end storage for UCP, the majority
of unit tests perform actual database operations. Mocking is used sparingly
because Deckhand is a fairly insular application that lives at the bottom
of a very deep stack; Deckhand only communicates with Keystone and Barbican.
As such, validating database operations is paramount to correctly testing
Deckhand.
To run unit tests using sqlite, execute::
$ tox -epy27
$ tox -epy35
against a py27- or py35-backed environment, respectively. To run individual
unit tests, run::
$ tox -e py27 -- deckhand.tests.unit.db.test_revisions
for example.
To run unit tests using postgresql, execute::
$ tox -epy27-postgresql
$ tox -epy35-postgresql
against a py27- or py35-backed environment, respectively. Individual unit tests
can be executed the same way as above.
`pifpaf <https://github.com/jd/pifpaf>`_ is used to spin up a temporary
postgresql database. The URL is set up as an environment variable via
``PIFPAF_URL``.
.. warning::
It is **not** recommended to run postgresql-backed unit tests concurrently.
Only run them serially. This is because, to guarantee true test isolation,
the DB tables are re-created each test run. Only one instance of postgresql
is created across all threads, thus causing major conflicts if concurrency
> 1.
Functional testing
==================
Prerequisites
-------------
Deckhand requires Docker to run its functional tests. A basic installation
guide for Docker for Ubuntu can be found
`here <https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/>`_.
Overview
--------
Deckhand uses `gabbi <https://github.com/cdent/gabbi>`_ as its functional
testing framework. Functional tests can be executed via::
$ tox -e functional
You can also run a subset of tests via a regex::
$ tox -e functional -- gabbi.suitemaker.test_gabbi_document-crud-success-multi-bucket
The command executes ``tools/functional-tests.sh`` which:
1) Launches Postgresql inside a Docker container.
2) Sets up a basic Deckhand configuration file that uses Postgresql
in its ``oslo_db`` connection string.
3) Sets up a custom policy file with very liberal permissions so that
gabbi can talk to Deckhand without having to authenticate against
Keystone and pass an admin token to Deckhand.
4) Instantiates Deckhand via ``uwisgi``.
5) Calls gabbi which runs a battery of functional tests.
At this time, there are no functional tests for policy enforcement
verification. Negative tests will be added at a later date to confirm that
a 403 Forbidden is raised for each endpoint that does policy enforcement
absent necessary permissions.

View File

@ -17,3 +17,4 @@ bandit>=1.1.0 # Apache-2.0
sphinx>=1.6.2 # BSD
gabbi==1.35.1
sphinx_rtd_theme==0.2.4
pifpaf==0.10.0

16
tools/pretty_tox.sh Executable file
View File

@ -0,0 +1,16 @@
#!/usr/bin/env bash
set -o pipefail
TESTRARGS=$1
# --until-failure is not compatible with --subunit see:
#
# https://bugs.launchpad.net/testrepository/+bug/1411804
#
# this work around exists until that is addressed
if [[ "$TESTARGS" =~ "until-failure" ]]; then
python setup.py testr --slowest --testr-args="$TESTRARGS"
else
python setup.py testr --slowest --testr-args="--subunit $TESTRARGS" | subunit-trace -f
fi

10
tox.ini
View File

@ -23,11 +23,21 @@ commands =
{[testenv]commands}
ostestr '{posargs}'
[testenv:py27-postgresql]
commands =
{[testenv]commands}
pifpaf run postgresql -- '{toxinidir}'/tools/pretty_tox.sh '--concurrency=1 {posargs}'
[testenv:py35]
commands =
{[testenv]commands}
ostestr '{posargs}'
[testenv:py35-postgresql]
commands =
{[testenv]commands}
pifpaf run postgresql -- '{toxinidir}'/tools/pretty_tox.sh '--concurrency=1 {posargs}'
[testenv:functional]
usedevelop = True
setenv = VIRTUAL_ENV={envdir}