diff --git a/deckhand/common/utils.py b/deckhand/common/utils.py index c0bdf3a3..8b6be04f 100644 --- a/deckhand/common/utils.py +++ b/deckhand/common/utils.py @@ -87,6 +87,7 @@ def jsonpath_parse(data, jsonpath, match_all=False): :param data: The `data` section of a document. :param jsonpath: A multi-part key that references a nested path in ``data``. + :param match_all: Whether to return all matches or just the first one. :returns: Entry that corresponds to ``data[jsonpath]`` if present, else None. @@ -107,7 +108,78 @@ def jsonpath_parse(data, jsonpath, match_all=False): return result if match_all else result[0] -def _execute_data_expansion(jsonpath, data): +def _execute_replace(data, value, jsonpath, pattern=None, recurse=None): + # These are O(1) reference copies to avoid accidentally modifying source + # data. We only want to update destination data. + data_copy = copy.copy(data) + value_copy = copy.copy(value) + + path = _jsonpath_parse(jsonpath) + path_to_change = path.find(data_copy) + recurse = recurse or {} + + def _try_replace_pattern(to_replace): + try: + # A pattern requires us to look up the data located at + # to_replace[jsonpath] and then figure out what + # re.match(to_replace[jsonpath], pattern) is (in pseudocode). + # Raise an exception in case the path isn't present in the + # to_replace and a pattern has been provided since it is + # otherwise impossible to do the look-up. + replacement = re.sub(pattern, + six.text_type(value_copy), + to_replace) + except TypeError as e: + LOG.error('Failed to substitute the value %s into %s ' + 'using pattern %s. Details: %s', + six.text_type(value_copy), to_replace, pattern, + six.text_type(e)) + raise errors.MissingDocumentPattern(jsonpath=jsonpath, + pattern=pattern) + return replacement + + def _replace_pattern_recursively(curr_data, depth, max_depth=-1): + # If max_depth is -1 (meaning no depth), then recursion will be + # performed over all of ``curr_data`` as depth starts out at 0. + if depth == max_depth: + return + + if isinstance(curr_data, dict): + for k, v in curr_data.items(): + if isinstance(v, six.string_types) and pattern in v: + replacement = _try_replace_pattern(v) + curr_data[k] = replacement + else: + _replace_pattern_recursively(v, depth + 1, max_depth) + elif isinstance(curr_data, list): + for idx, v in enumerate(curr_data): + if isinstance(v, six.string_types) and pattern in v: + replacement = _try_replace_pattern(v) + curr_data[idx] = replacement + else: + _replace_pattern_recursively(v, depth + 1, max_depth) + + to_replace = path_to_change[0].value + if pattern: + if recurse: + max_depth = recurse.get('depth', -1) + # Recursion is only possible for lists/dicts. + if isinstance(to_replace, (dict, list)): + _replace_pattern_recursively(to_replace, 0, max_depth) + return data_copy + else: + # Edge case to handle a path that leads to a string value + # (not a list or dict). Even though no recursion is + # technically possible, gracefully handle this by + # performing non-recursive pattern replacement on the str. + return path.update(data_copy, _try_replace_pattern(to_replace)) + else: + return path.update(data_copy, _try_replace_pattern(to_replace)) + else: + return path.update(data_copy, value_copy) + + +def _execute_data_expansion(data, jsonpath): # Expand ``data`` with any path specified in ``jsonpath``. For example, # if jsonpath is ".foo[0].bar.baz" then for each subpath -- foo[0], bar, # and baz -- that key will be added to ``data`` if missing. @@ -137,25 +209,13 @@ def _execute_data_expansion(jsonpath, data): d = d.get(path) -def jsonpath_replace(data, value, jsonpath, pattern=None): +def jsonpath_replace(data, value, jsonpath, pattern=None, recurse=None): """Update value in ``data`` at the path specified by ``jsonpath``. - If the nested path corresponding to ``jsonpath`` isn't found in ``data``, the path is created as an empty ``{}`` for each sub-path along the ``jsonpath``. - :param data: The `data` section of a document. - :param value: The new value for ``data[jsonpath]``. - :param jsonpath: A multi-part key that references a nested path in - ``data``. Must begin with "." (without quotes). - :param pattern: A regular expression pattern. - :returns: Updated value at ``data[jsonpath]``. - :raises: MissingDocumentPattern if ``pattern`` is not None and - ``data[jsonpath]`` doesn't exist. - :raises ValueError: If ``jsonpath`` doesn't begin with "." - Example:: - doc = { 'data': { 'some_url': http://admin:INSERT_PASSWORD_HERE@svc-name:8080/v1 @@ -169,6 +229,24 @@ def jsonpath_replace(data, value, jsonpath, pattern=None): # The returned URL will look like: # http://admin:super-duper-secret@svc-name:8080/v1 doc['data'].update(replaced_data) + + :param data: The ``data`` section of a document. + :param value: The new value for ``data[jsonpath]``. + :param jsonpath: A multi-part key that references a nested path in + ``data``. Must begin with "." or "$" (without quotes). + :param pattern: A regular expression pattern. + :param recurse: Dictionary containing a single key called "depth" which + specifies the recursion depth. If provided, indicates that recursive + pattern substitution should be performed, beginning at ``jsonpath``. + Best practice is to limit the scope of the recursion as much as + possible: e.g. avoid passing in "$" as the ``jsonpath``, but rather + a JSON path that lives closer to the nested strings in question. + Optimize performance by choosing an ideal ``depth`` value; -1 will + cause recursion depth to be infinite. + :returns: Updated value at ``data[jsonpath]``. + :raises: MissingDocumentPattern if ``pattern`` is not None and + ``data[jsonpath]`` doesn't exist. + :raises ValueError: If ``jsonpath`` doesn't begin with "." """ # These are O(1) reference copies to avoid accidentally modifying source @@ -177,45 +255,23 @@ def jsonpath_replace(data, value, jsonpath, pattern=None): value_copy = copy.copy(value) jsonpath = _normalize_jsonpath(jsonpath) + recurse = recurse or {} if not jsonpath == '$' and not jsonpath.startswith('$.'): LOG.error('The provided jsonpath %s does not begin with "." or "$"', jsonpath) + # TODO(felipemonteiro): Use a custom internal exception for this. raise ValueError('The provided jsonpath %s does not begin with "." ' 'or "$"' % jsonpath) - def _execute_replace(path, path_to_change): - if path_to_change: - new_value = value_copy - if pattern: - to_replace = path_to_change[0].value - # `new_value` represents the value to inject into `to_replace` - # that matches the `pattern`. - try: - # A pattern requires us to look up the data located at - # data[jsonpath] and then figure out what - # re.match(data[jsonpath], pattern) is (in pseudocode). - # Raise an exception in case the path isn't present in the - # data and a pattern has been provided since it is - # otherwise impossible to do the look-up. - new_value = re.sub(pattern, str(value_copy), to_replace) - except TypeError as e: - LOG.error('Failed to substitute the value %s into %s ' - 'using pattern %s. Details: %s', str(value_copy), - to_replace, pattern, six.text_type(e)) - raise errors.MissingDocumentPattern(jsonpath=jsonpath, - pattern=pattern) - - return path.update(data_copy, new_value) - # Deckhand should be smart enough to create the nested keys in the # data if they don't exist and a pattern isn't required. path = _jsonpath_parse(jsonpath) path_to_change = path.find(data_copy) if not path_to_change: - _execute_data_expansion(jsonpath, data_copy) - path_to_change = path.find(data_copy) - return _execute_replace(path, path_to_change) + _execute_data_expansion(data_copy, jsonpath) + return _execute_replace(data_copy, value_copy, jsonpath, pattern=pattern, + recurse=recurse) def multisort(data, sort_by=None, order_by=None): diff --git a/deckhand/engine/schemas/metadata_document.yaml b/deckhand/engine/schemas/metadata_document.yaml index 57379b12..a64067b4 100644 --- a/deckhand/engine/schemas/metadata_document.yaml +++ b/deckhand/engine/schemas/metadata_document.yaml @@ -27,6 +27,17 @@ data: type: string pattern: type: string + recurse: + type: object + properties: + depth: + type: integer + minimum: -1 + # -1 indicates that the recursion depth is infinite. Refinements + # to this value should be specified by the caller. + default: -1 + required: + - depth additionalProperties: false required: - path diff --git a/deckhand/engine/secrets_manager.py b/deckhand/engine/secrets_manager.py index ba86b0b0..0ef1081f 100644 --- a/deckhand/engine/secrets_manager.py +++ b/deckhand/engine/secrets_manager.py @@ -320,6 +320,7 @@ class SecretsSubstitution(object): for each_dest_path in dest_array: dest_path = each_dest_path['path'] dest_pattern = each_dest_path.get('pattern', None) + dest_recurse = each_dest_path.get('recurse', {}) LOG.debug('Substituting from schema=%s layer=%s name=%s ' 'src_path=%s into dest_path=%s, dest_pattern=%s', @@ -329,8 +330,8 @@ class SecretsSubstitution(object): try: exc_message = '' substituted_data = utils.jsonpath_replace( - document['data'], src_secret, - dest_path, dest_pattern) + document['data'], src_secret, dest_path, + pattern=dest_pattern, recurse=dest_recurse) if (isinstance(document['data'], dict) and isinstance(substituted_data, dict)): document['data'].update(substituted_data) diff --git a/deckhand/tests/unit/common/test_utils.py b/deckhand/tests/unit/common/test_utils.py index ac55c3b6..42928de4 100644 --- a/deckhand/tests/unit/common/test_utils.py +++ b/deckhand/tests/unit/common/test_utils.py @@ -57,6 +57,135 @@ class TestJSONPathReplace(test_base.DeckhandTestCase): pattern="REGEX") self.assertEqual(expected, result) + def test_jsonpath_replace_with_pattern_and_array_index(self): + path = ".values.endpoints.admin[1]" + body = {"values": {"endpoints": {"admin": [None, "REGEX_FRESH"]}}} + expected = {"values": {"endpoints": {"admin": [None, "EAT_FRESH"]}}} + result = utils.jsonpath_replace(body, "EAT", jsonpath=path, + pattern="REGEX") + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_dict(self): + path = ".values" + body = {"values": {"re1": "REGEX_ONE", "re2": "REGEX_TWO"}} + expected = {"values": {"re1": "YES_ONE", "re2": "YES_TWO"}} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_list(self): + path = ".values" + + # String entries inside list. + body = {"values": ["REGEX_ONE", "REGEX_TWO"]} + expected = {"values": ["YES_ONE", "YES_TWO"]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + # Dictionary entries inside list. + body = {"values": [{"re1": "REGEX_ONE", "re2": "REGEX_TWO"}]} + expected = {"values": [{"re1": "YES_ONE", "re2": "YES_TWO"}]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_str(self): + """Edge case to validate that passing in a path that leads to a string + value itself (not a list or dict) still results in pattern replacement + gracefully passing, even though no recursion is technically possible. + """ + path = ".values.endpoints.admin" + body = {"values": {"endpoints": {"admin": "REGEX_FRESH"}}} + expected = {"values": {"endpoints": {"admin": "EAT_FRESH"}}} + result = utils.jsonpath_replace(body, "EAT", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_dict_nested(self): + path = ".values" + body = {"values": {"re1": "REGEX_ONE", "nested": {"re2": "REGEX_TWO"}}} + expected = {"values": {"re1": "YES_ONE", "nested": {"re2": "YES_TWO"}}} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_list_nested(self): + path = ".values" + + # String entry inside nested list. + body = {"values": [{"re1": "REGEX_ONE", "nested": ["REGEX_TWO"]}]} + expected = {"values": [{"re1": "YES_ONE", "nested": ["YES_TWO"]}]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + # Dictionary entry inside nested list. + body = {"values": [{"nested": [{"re2": "REGEX_TWO"}]}]} + expected = {"values": [{"nested": [{"re2": "YES_TWO"}]}]} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_pattern_recursive_root_path(self): + """Validate that recursion happens even from root path.""" + path = "$" + body = {"values": {"re1": "REGEX_ONE", "nested": {"re2": "REGEX_TWO"}}} + expected = {"values": {"re1": "YES_ONE", "nested": {"re2": "YES_TWO"}}} + result = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", recurse={'depth': -1}) + self.assertEqual(expected, result) + + def test_jsonpath_replace_with_different_patterns_recursive(self): + """Edge case to validate that different regexes that live recursively + under the same parent path are handled gracefully. Note that + non-matching regexes are obviously skipped over. + """ + path = ".values" + + # Only the first string's pattern will be replaced since it'll match + # REGEX. The second one won't as its pattern is XEGER. + body = {"values": [{"re1": "REGEX_ONE", "nested": ["XEGER_TWO"]}]} + expected = {"values": [{"re1": "YES_ONE", "nested": ["XEGER_TWO"]}]} + result1 = utils.jsonpath_replace(body, "YES", jsonpath=path, + pattern="REGEX", + recurse={'depth': -1}) + self.assertEqual(expected, result1) + + # Now replace the second one by passing in pattern="XEGER". + expected = {"values": [{"re1": "YES_ONE", "nested": ["NO_TWO"]}]} + result2 = utils.jsonpath_replace(result1, "NO", jsonpath=path, + pattern="XEGER", + recurse={'depth': -1}) + self.assertEqual(expected, result2) + + def test_jsonpath_replace_with_recursion_depth_specified(self): + # Only the first string's pattern will be replaced since it'll + # only recurse 1 level. + body = {"re1": "REGEX_ONE", "values": {"re2": "REGEX_TWO"}} + expected = {"re1": "YES_ONE", "values": {"re2": "REGEX_TWO"}} + result = utils.jsonpath_replace(body, "YES", jsonpath="$", + pattern="REGEX", + recurse={'depth': 1}) + self.assertEqual(expected, result) + + # Depth of 2 should cover both. + body = {"re1": "REGEX_ONE", "values": {"re2": "REGEX_TWO"}} + expected = {"re1": "YES_ONE", "values": {"re2": "YES_TWO"}} + result = utils.jsonpath_replace(body, "YES", jsonpath="$", + pattern="REGEX", + recurse={'depth': 2}) + self.assertEqual(expected, result) + + # Depth of 3 is required as the list around "REGEX_TWO" results in + # another layer of recursion. + body = {"re1": "REGEX_ONE", "values": {"re2": ["REGEX_TWO"]}} + expected = {"re1": "YES_ONE", "values": {"re2": ["YES_TWO"]}} + result = utils.jsonpath_replace(body, "YES", jsonpath="$", + pattern="REGEX", + recurse={'depth': 3}) + self.assertEqual(expected, result) + class TestJSONPathReplaceNegative(test_base.DeckhandTestCase): """Validate JSONPath replace negative scenarios.""" diff --git a/doc/source/substitution.rst b/doc/source/substitution.rst index d9bacbe2..00020325 100644 --- a/doc/source/substitution.rst +++ b/doc/source/substitution.rst @@ -255,6 +255,49 @@ document) will be: --- schema: armada/Chart/v1 + metadata: + name: example-chart-01 + schema: metadata/Document/v1 + [...] + data: + chart: + details: + data: here + values: + # Notice string replacement occurs at exact location specified by + # ``dest.pattern``. + some_url: http://admin:my-secret-password@service-name:8080/v1 + +Recursive Replacement of Patterns +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Patterns may also be replaced recursively. This can be achieved by using +specifying a ``pattern`` value and ``recurse`` as ``True`` (it otherwise +defaults to ``False``). Best practice is to limit the scope of the recursion +as much as possible: e.g. avoid passing in "$" as the ``jsonpath``, but rather +a JSON path that lives closer to the nested strings in question. + +.. note:: + + Recursive selection of patterns will only consider matching patterns. + Non-matching patterns will be ignored. Thus, even if recursion can "pass + over" non-matching patterns, they will be silently ignored. + +.. code-block:: yaml + + --- + # Source document. + schema: deckhand/Passphrase/v1 + metadata: + name: example-password + schema: metadata/Document/v1 + layeringDefinition: + layer: site + storagePolicy: cleartext + data: my-secret-password + --- + # Destination document. + schema: armada/Chart/v1 metadata: name: example-chart-01 schema: metadata/Document/v1 @@ -262,12 +305,40 @@ document) will be: layer: region substitutions: - dest: - path: .chart.values.some_url + # Note that the path encapsulates all 3 entries that require pattern + # replacement. + path: .chart.values pattern: INSERT_[A-Z]+_HERE + recurse: + # Note that specifying the depth is mandatory. -1 means that all + # layers are recursed through. + depth: -1 src: schema: deckhand/Passphrase/v1 name: example-password path: . + data: + chart: + details: + data: here + values: + # Notice string replacement occurs for all paths recursively captured + # by dest.path, since all their patterns match dest.pattern. + admin_url: http://admin:INSERT_PASSWORD_HERE@service-name:35357/v1 + internal_url: http://internal:INSERT_PASSWORD_HERE@service-name:5000/v1 + public_url: http://public:INSERT_PASSWORD_HERE@service-name:5000/v1 + +After document rendering, the output for ``example-chart-01`` (the destination +document) will be: + +.. code-block:: yaml + + --- + schema: armada/Chart/v1 + metadata: + name: example-chart-01 + schema: metadata/Document/v1 + [...] data: chart: details: @@ -275,7 +346,14 @@ document) will be: values: # Notice how the data from the source document is injected into the # exact location specified by ``dest.pattern``. - some_url: http://admin:my-secret-password@service-name:8080/v1 + admin_url: http://admin:my-secret-password@service-name:35357/v1 + internal_url: http://internal:my-secret-passwor@service-name:5000/v1 + public_url: http://public:my-secret-passwor@service-name:5000/v1 + +Note that the recursion depth must be specified. -1 effectively ignores the +depth. Any other positive integer will specify how many levels deep to recurse +in order to optimize recursive pattern replacement. Take care to specify the +required recursion depth or else too-deep patterns won't be replaced. Substitution of Encrypted Data ------------------------------