Update image and chart mgmt

NOTE: This has become a monolithic commit to get gate settings/scripts in place for CI - Add Makefile with UCP standard entrypoints - Move Dockerfile into images/drydock per UCP standards - Add values.yaml entries for uWSGI threads and workers - Add environment variables to chart Deployment manifest for uWSGI thread and workers - Add threads and workers specification to uWSGI commandline in entrypoint - Test that the Drydock API is responding - Test that the Drydock API rejects noauth requests - Fix Makefile utility script to work behind a proxy Correct task success voting Some tasks were incorrectly considered partial_success even when no failure occurred. - Network configuration erroneously marked messages as errors - Update result propagation logic to only use the latest retry The deploy_nodes task ended as incomplete due to a missing subtask assignment Also added a node check step to prepare_nodes so that nodes that are already under provisioner control (MaaS) are not IPMI-rebooted. Tangential changes: - added config item to for leadership claim interval - added some debug logging to bootaction_report task - fix tasks list API endpoint to generate valid JSON Improve task concurrency When tasks are started with a scope of multiple nodes, split the main task so each node is managed independently to de-link the progression of nodes. - Split the prepare_nodes task - Begin reducing cyclomatic complexity to allow for better unit testing - Improved tox testing to include coverage by default - Include postgresql integration tests in coverage Closes #73 Change-Id: I600c2a4db74dd42e809bc3ee499fb945ebdf31f6
2017-11-17 11:23:16 -06:00 · 2017-11-17 11:23:16 -06:00 · ae87cd1714
parent 8b31a22c2b
commit ae87cd1714
28 changed files with 866 additions and 251 deletions
--- a/83
+++ b/83
@ -0,0 +1,83 @@
 # Copyright 2017 AT&T Intellectual Property.  All other rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 DRYDOCK_IMAGE_NAME         ?= drydock
 IMAGE_PREFIX               ?= attcomdev
 IMAGE_TAG                  ?= latest
 HELM                       ?= helm
 PROXY                      ?= http://one.proxy.att.com:8080
 USE_PROXY                  ?= false
 export
 # Build all docker images for this project
 .PHONY: images
 images: build_drydock
 # Run an image locally and exercise simple tests
 .PHONY: run_images
 run_images: run_drydock
 # Run tests
 .PHONY: tests
 tests: coverage_test
 # Run unit and Postgres integration tests in coverage mode
 .PHONY: coverage_test
 coverage_test: build_drydock
 	tox -e coverage
 # Run the drydock container and exercise simple tests
 .PHONY: run_drydock
 run_drydock: build_drydock
 	tools/drydock_image_run.sh
 # Create tgz of the chart
 .PHONY: charts
 charts: clean
 	$(HELM) dep up charts/drydock
 	$(HELM) package charts/drydock
 # Perform Linting
 .PHONY: lint
 lint: pep8 helm_lint
 # Dry run templating of chart
 .PHONY: dry-run
 dry-run: clean
 	tools/helm_tk.sh $(HELM)
 	$(HELM) template charts/drydock
 # Make targets intended for use by the primary targets above.
 .PHONY: build_drydock
 build_drydock:
 ifeq ($(USE_PROXY), true)
 	docker build -t $(IMAGE_PREFIX)/$(DRYDOCK_IMAGE_NAME):$(IMAGE_TAG) -f images/drydock/Dockerfile . --build-arg http_proxy=$(PROXY) --build-arg https_proxy=$(PROXY)
 else
 	docker build -t $(IMAGE_PREFIX)/$(DRYDOCK_IMAGE_NAME):$(IMAGE_TAG) -f images/drydock/Dockerfile .
 endif
 .PHONY: clean
 clean:
 	rm -rf build
 .PHONY: pep8
 pep8:
 	tox -e pep8
 .PHONY: helm_lint
 helm_lint: clean
 	tools/helm_tk.sh $(HELM)
 	$(HELM) lint charts/drydock
--- a/charts/drydock/templates/deployment.yaml
+++ b/charts/drydock/templates/deployment.yaml
@ -33,6 +33,8 @@ spec:
        configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
    spec:
      restartPolicy: Always
      affinity:
 {{ tuple $envAll "drydock" "api" | include "helm-toolkit.snippets.kubernetes_pod_anti_affinity" | indent 8 }}
      initContainers:
 {{ tuple $envAll $dependencies "[]" | include "helm-toolkit.snippets.kubernetes_entrypoint_init_container" | indent 8 }}
      containers:
@ -43,6 +45,10 @@ spec:
                secretKeyRef:
                  name: 'maas-api-key'
                  key: 'token'
            - name: 'DRYDOCK_API_WORKERS'
              value: {{ .Values.conf.uwsgi.workers | default 1 | quote }}
            - name: 'DRYDOCK_API_THREADS'
              value: {{ .Values.conf.uwsgi.threads | default 1 | quote }}
          image: {{ .Values.images.tags.drydock }}
          imagePullPolicy: {{ .Values.images.pull_policy }}
 {{ tuple $envAll $envAll.Values.pod.resources.api | include "helm-toolkit.snippets.kubernetes_resources" | indent 10 }}
--- a/charts/drydock/templates/tests/test-drydock-api.yaml
+++ b/charts/drydock/templates/tests/test-drydock-api.yaml
@ -0,0 +1,37 @@
 {{/*
 # Copyright (c) 2017 AT&T Intellectual Property. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License. */}}
 {{/*
 Test that the API is up and the health endpoint returns a 2XX code */}}
 {{- if .Values.manifests.test_drydock_api }}
 ---
 apiVersion: v1
 kind: Pod
 metadata:
  name: "{{ .Release.Name }}-api-test"
  annotations:
    "helm.sh/hook": "test-success"
 spec:
  restartPolicy: Never
  containers:
    - name: "{{ .Release.Name }}-api-test"
      env:
        - name: 'DRYDOCK_URL'
          value: {{ tuple "physicalprovisioner" "internal" "api" . | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" | quote }}
      image: {{ .Values.images.tags.drydock }}
      imagePullPolicy: {{ .Values.images.pull_policy }}
 {{ tuple . .Values.pod.resources.test | include "helm-toolkit.snippets.kubernetes_resources" | indent 8 }}
      command: ["/bin/bash", "-c", "curl -v -X GET --fail ${DRYDOCK_URL}/api/v1.0/health; exit $?"]
 ...
 {{- end }}
--- a/charts/drydock/templates/tests/test-drydock-auth.yaml
+++ b/charts/drydock/templates/tests/test-drydock-auth.yaml
@ -0,0 +1,38 @@
 {{/*
 # Copyright (c) 2017 AT&T Intellectual Property. All rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License. */}}
 {{/*
 Test to validate that the Drydock API pod rejects unauthenticated
 requests */}}
 {{- if .Values.manifests.test_drydock_auth }}
 ---
 apiVersion: v1
 kind: Pod
 metadata:
  name: "{{ .Release.Name }}-auth-test"
  annotations:
    helm.sh/hook: "test-failure"
 spec:
  restartPolicy: Never
  containers:
    - name: "{{ .Release.Name }}-auth-test"
      env:
        - name: 'DRYDOCK_URL'
          value: {{ tuple "physicalprovisioner" "internal" "api" . | include "helm-toolkit.endpoints.host_and_port_endpoint_uri_lookup" | quote }}
      image: {{ .Values.images.tags.drydock }}
      imagePullPolicy: {{ .Values.images.pull_policy }}
 {{ tuple . .Values.pod.resources.test | include "helm-toolkit.snippets.kubernetes_resources" | indent 8 }}
      command: ["/bin/bash", "-c", "curl -v -X GET --fail ${DRYDOCK_URL}/api/v1.0/tasks; exit $?"]
 ...
 {{- end }}
--- a/charts/drydock/values.yaml
+++ b/charts/drydock/values.yaml
@ -46,8 +46,21 @@ pod:
        rolling_update:
          max_unavailable: 1
          max_surge: 3
  affinity:
    anti:
      type:
        default: preferredDuringSchedulingIgnoredDuringExecution
      topologyKey:
        default: kubernetes.io/hostname
  resources:
    enabled: false
    test:
      limits:
        memory: "128Mi"
        cpu: "100m"
      requests:
        memory: "128Mi"
        cpu: "100m"
    api:
      limits:
        memory: "128Mi"
@ -103,6 +116,8 @@ manifests:
  configmap_bin: true
  service_drydock: true
  deployment_drydock: true
  test_drydock_api: true
  test_drydock_auth: true
 dependencies:
  db_init:
@ -220,6 +235,9 @@ database:
 # Settings for drydock.conf
 conf:
  uwsgi:
    threads: 1
    workers: 1
  drydock:
    logging:
      log_level: 'DEBUG'
--- a/drydock_provisioner/cli/node/.actions.py.swp
+++ b/drydock_provisioner/cli/node/.actions.py.swp
--- a/drydock_provisioner/config.py
+++ b/drydock_provisioner/config.py
@ -54,7 +54,13 @@ class DrydockConfig(object):
            'leader_grace_period',
            default=300,
            help=
-            'How long a leader has to check-in before leaderhsip can be usurped, in seconds'
+            'How long a leader has to check-in before leadership can be usurped, in seconds'
        ),
        cfg.IntOpt(
            'leadership_claim_interval',
            default=30,
            help=
            'How often will an instance attempt to claim leadership, in seconds'
        ),
    ]
--- a/drydock_provisioner/control/tasks.py
+++ b/drydock_provisioner/control/tasks.py
@ -41,7 +41,7 @@ class TasksResource(StatefulResource):
        """Handler for GET method."""
        try:
            task_model_list = self.state_manager.get_tasks()
-            task_list = [str(x.to_dict()) for x in task_model_list]
+            task_list = [x.to_dict() for x in task_model_list]
            resp.body = json.dumps(task_list)
            resp.status = falcon.HTTP_200
        except Exception as ex:
--- a/drydock_provisioner/drivers/node/maasdriver/actions/node.py
+++ b/drydock_provisioner/drivers/node/maasdriver/actions/node.py
@ -224,7 +224,7 @@ class CreateNetworkTemplate(BaseMaasAction):
        # Try to true up MaaS definitions of fabrics/vlans/subnets
        # with the networks defined in Drydock
-        design_networks = site_design.networks
+        design_networks = list()
        design_links = site_design.network_links
        fabrics = maas_fabric.Fabrics(self.maas_client)
@ -306,6 +306,7 @@ class CreateNetworkTemplate(BaseMaasAction):
            # that VLAN tags and subnet attributes are correct
            for net_name in l.allowed_networks:
                n = site_design.get_network(net_name)
                design_networks.append(n)
                if n is None:
                    continue
@ -406,7 +407,7 @@ class CreateNetworkTemplate(BaseMaasAction):
                            subnet.resource_id, n.name)
                        self.task.add_status_msg(
                            msg=msg,
-                            error=True,
+                            error=False,
                            ctx=n.name,
                            ctx_type='network')
                        self.logger.info(msg)
@ -430,7 +431,7 @@ class CreateNetworkTemplate(BaseMaasAction):
                            self.logger.debug(msg)
                            self.task.add_status_msg(
                                msg=msg,
-                                error=True,
+                                error=False,
                                ctx=n.name,
                                ctx_type='network')
                        else:
@ -442,6 +443,7 @@ class CreateNetworkTemplate(BaseMaasAction):
                                error=True,
                                ctx=n.name,
                                ctx_type='network')
                            self.task.failure(focus=n.name)
                    # Check if the routes have a default route
                    subnet.gateway_ip = n.get_default_gateway()
@ -527,6 +529,7 @@ class CreateNetworkTemplate(BaseMaasAction):
                                error=True,
                                ctx=n.name,
                                ctx_type='network')
                            self.task.failure(focus=n.name)
                    elif dhcp_on and vlan.dhcp_on:
                        self.logger.info("DHCP already enabled for subnet %s" %
@ -558,9 +561,12 @@ class CreateNetworkTemplate(BaseMaasAction):
                    msg = "Could not locate destination network for static route to %s." % route_net
                    self.task.add_status_msg(
                        msg=msg, error=True, ctx=n.name, ctx_type='network')
                    self.task.failure(focus=n.name)
                    self.logger.info(msg)
                    continue
        # now validate that all subnets allowed on a link were created
        for n in design_networks:
            if n.metalabels is not None:
                # TODO(sh8121att): move metalabels into config
@ -572,13 +578,13 @@ class CreateNetworkTemplate(BaseMaasAction):
            exists = subnet_list.singleton({'cidr': n.cidr})
            if exists is not None:
-                self.task.success()
+                self.task.success(focus=n.name)
            else:
                msg = "Network %s defined, but not found in MaaS after network config task." % n.name
                self.logger.error(msg)
                self.task.add_status_msg(
                    msg=msg, error=True, ctx=n.name, ctx_type='network')
-                self.task.failure()
+                self.task.failure(focus=n.name)
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
--- a/drydock_provisioner/drivers/node/maasdriver/driver.py
+++ b/drydock_provisioner/drivers/node/maasdriver/driver.py
@ -155,7 +155,7 @@ class MaasNodeDriver(NodeDriver):
                    nf = self.orchestrator.create_nodefilter_from_nodelist([n])
                    subtask = self.orchestrator.create_task(
                        design_ref=task.design_ref,
-                        action=hd_fields.OrchestratorAction.IdentifyNode,
+                        action=task.action,
                        node_filter=nf,
                        retry=task.retry)
                    task.register_subtask(subtask)
--- a/drydock_provisioner/drivers/oob/pyghmi_driver/actions/oob.py
+++ b/drydock_provisioner/drivers/oob/pyghmi_driver/actions/oob.py
@ -71,7 +71,7 @@ class PyghmiBaseAction(BaseAction):
                ipmi_session = self.get_ipmi_session(node)
            except (IpmiException, errors.DriverError) as iex:
                self.logger.error("Error initializing IPMI session for node %s"
-                                  % self.node.name)
+                                  % node.name)
                self.logger.debug("IPMI Exception: %s" % str(iex))
                self.logger.warning(
                    "IPMI command failed, retrying after 15 seconds...")
@ -168,14 +168,14 @@ class SetNodeBoot(PyghmiBaseAction):
                    ctx=n.name,
                    ctx_type='node')
                self.logger.debug("%s reports bootdev of network" % n.name)
-                self.task.success()
+                self.task.success(focus=n.name)
            else:
                self.task.add_status_msg(
                    msg="Unable to set bootdev to PXE.",
                    error=True,
                    ctx=n.name,
                    ctx_type='node')
-                self.task.failure()
+                self.task.failure(focus=n.name)
                self.logger.warning("Unable to set node %s to PXE boot." %
                                    (n.name))
@ -219,7 +219,7 @@ class PowerOffNode(PyghmiBaseAction):
                        ctx_type='node')
                    self.logger.debug(
                        "Node %s reports powerstate of off" % n.name)
-                    self.task.success()
+                    self.task.success(focus=n.name)
                    break
                time.sleep(10)
                i = i - 1
@ -232,7 +232,7 @@ class PowerOffNode(PyghmiBaseAction):
                    ctx=n.name,
                    ctx_type='node')
                self.logger.error("Giving up on IPMI command to %s" % n.name)
-                self.task.failure()
+                self.task.failure(focus=n.name)
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
@ -274,7 +274,7 @@ class PowerOnNode(PyghmiBaseAction):
                        error=False,
                        ctx=n.name,
                        ctx_type='node')
-                    self.task.success()
+                    self.task.success(focus=n.name)
                    break
                time.sleep(10)
                i = i - 1
@ -287,7 +287,7 @@ class PowerOnNode(PyghmiBaseAction):
                    ctx=n.name,
                    ctx_type='node')
                self.logger.error("Giving up on IPMI command to %s" % n.name)
-                self.task.failure()
+                self.task.failure(focus=n.name)
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
@ -340,7 +340,7 @@ class PowerCycleNode(PyghmiBaseAction):
                self.logger.warning(
                    "Failed powering down node %s during power cycle task" %
                    n.name)
-                self.task.failure()
+                self.task.failure(focus=n.name)
                break
            self.logger.debug("Sending set_power = on command to %s" % n.name)
@ -367,7 +367,7 @@ class PowerCycleNode(PyghmiBaseAction):
                    error=False,
                    ctx=n.name,
                    ctx_type='node')
-                self.task.success()
+                self.task.success(focus=n.name)
            else:
                self.task.add_status_msg(
                    msg="Failed to power up during power cycle.",
@ -377,7 +377,7 @@ class PowerCycleNode(PyghmiBaseAction):
                self.logger.warning(
                    "Failed powering up node %s during power cycle task" %
                    n.name)
-                self.task.failure()
+                self.task.failure(focus=n.name)
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
@ -400,14 +400,16 @@ class InterrogateOob(PyghmiBaseAction):
            try:
                self.logger.debug(
                    "Interrogating node %s IPMI interface." % n.name)
-                mci_id = self.exec_ipmi_command(n, Command.get_mci)
+                powerstate = self.exec_ipmi_command(n, Command.get_power)
                if powerstate is None:
                    raise errors.DriverError()
                self.task.add_status_msg(
-                    msg="IPMI interface interrogation yielded MCI ID %s" %
+                    msg="IPMI interface interrogation yielded powerstate %s" %
-                    mci_id,
+                    powerstate.get('powerstate'),
                    error=False,
                    ctx=n.name,
                    ctx_type='node')
-                self.task.success()
+                self.task.success(focus=n.name)
            except errors.DriverError:
                self.logger.debug(
                    "Interrogating node %s IPMI interface failed." % n.name)
@ -416,7 +418,7 @@ class InterrogateOob(PyghmiBaseAction):
                    error=True,
                    ctx=n.name,
                    ctx_type='node')
-                self.task.failure()
+                self.task.failure(focus=n.name)
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
--- a/drydock_provisioner/drivers/oob/pyghmi_driver/driver.py
+++ b/drydock_provisioner/drivers/oob/pyghmi_driver/driver.py
@ -139,6 +139,7 @@ class PyghmiDriver(oob_driver.OobDriver):
                                uuid.UUID(bytes=t)),
                            exc_info=f.exception())
            task.align_result()
            task.bubble_results()
            task.set_status(hd_fields.TaskStatus.Complete)
            task.save()
--- a/drydock_provisioner/objects/task.py
+++ b/drydock_provisioner/objects/task.py
@ -17,6 +17,7 @@ import uuid
 import json
 import time
 import logging
 import copy
 from datetime import datetime
@ -62,7 +63,7 @@ class Task(object):
        self.retry = retry
        self.parent_task_id = parent_task_id
        self.created = datetime.utcnow()
-        self.node_filter = node_filter
+        self.node_filter = copy.deepcopy(node_filter)
        self.created_by = None
        self.updated = None
        self.terminated = None
@ -82,7 +83,10 @@ class Task(object):
        return self.task_id
    def retry_task(self, max_attempts=None):
-        """Check if this task should be retried and update attributes if so."""
+        """Check if this task should be retried and update attributes if so.
        :param max_attempts: The maximum number of retries for this task
        """
        if (self.result.status != hd_fields.ActionResult.Success) and (len(
                self.result.failures) > 0):
            if not max_attempts or (max_attempts
@ -237,12 +241,16 @@ class Task(object):
        nf['filter_set'].append(
            dict(node_names=self.result.successes, filter_type='union'))
        return nf
    def node_filter_from_failures(self):
        """Create a node filter from failure entities in this task's result."""
        nf = dict(filter_set_type='intersection', filter_set=[])
        nf['filter_set'].append(
            dict(node_names=self.result.failures, filter_type='union'))
        return nf
    def bubble_results(self, action_filter=None):
        """Combine successes and failures of subtasks and update this task with the result.
@ -277,23 +285,32 @@ class Task(object):
            else:
                self.logger.debug("Skipping subtask due to action filter.")
-    def align_result(self, action_filter=None):
+    def align_result(self, action_filter=None, reset_status=True):
        """Align the result of this task with the combined results of all the subtasks.
        If this task has a retry counter > 0, then failure or partial_success results
        of a subtask are only counted if the subtask retry counter is equivalent to this
        task.
        :param action_filter: string action name to filter subtasks on
        :param reset_status: Whether to reset the result status of this task before aligning
        """
        if reset_status:
            self.result.status = hd_fields.ActionResult.Incomplete
        for st in self.statemgr.get_complete_subtasks(self.task_id):
            if action_filter is None or (action_filter is not None
                                         and st.action == action_filter):
                self.logger.debug(
                    "Collecting result status from subtask %s." % str(st.task_id))
                if st.get_result() in [
                        hd_fields.ActionResult.Success,
                        hd_fields.ActionResult.PartialSuccess
                ]:
                    self.success()
-                if st.get_result() in [
+                if (st.get_result() in [
                        hd_fields.ActionResult.Failure,
                        hd_fields.ActionResult.PartialSuccess
-                ]:
+                ] and (self.retry == 0 or (self.retry == st.retry))):
                    self.failure()
            else:
                self.logger.debug("Skipping subtask due to action filter.")
@ -363,6 +380,7 @@ class Task(object):
            'request_context':
            json.dumps(self.request_context.to_dict())
            if self.request_context is not None else None,
            'node_filter': self.node_filter,
            'action':
            self.action,
            'terminated':
@ -449,6 +467,7 @@ class Task(object):
            'terminate',
            'updated',
            'retry',
            'node_filter',
        ]
        for f in simple_fields:
--- a/drydock_provisioner/orchestrator/actions/orchestrator.py
+++ b/drydock_provisioner/orchestrator/actions/orchestrator.py
@ -58,6 +58,38 @@ class BaseAction(object):
        return task_futures
    def _split_action(self):
        """Start a parallel action for each node in scope.
        Start a threaded instance of this action for each node in
        scope of this action's task. A subtask will be created for each
        action. Returns all a list of concurrent.futures managed
        threads running the actions.
        :returns: dictionary of subtask_id.bytes => Future instance
        """
        target_nodes = self.orchestrator.get_target_nodes(self.task)
        if len(target_nodes) > 1:
            self.logger.info(
                "Found multiple target nodes in task %s, splitting..." % str(
                    self.task.get_id()))
            split_tasks = dict()
            with concurrent.futures.ThreadPoolExecutor() as te:
                for n in target_nodes:
                    split_task = self.orchestrator.create_task(
                        design_ref=self.task.design_ref,
                        action=hd_fields.OrchestratorAction.PrepareNodes,
                        node_filter=self.orchestrator.
                        create_nodefilter_from_nodelist([n]))
                    self.task.register_subtask(split_task)
                    action = self.__class__(split_task, self.orchestrator,
                                            self.state_manager)
                    split_tasks[split_task.get_id().bytes] = te.submit(action.start)
            return split_tasks
    def _collect_subtask_futures(self, subtask_futures, timeout=300):
        """Collect Futures executing on subtasks or timeout.
@ -66,10 +98,10 @@ class BaseAction(object):
        :param subtask_futures: dictionary of subtask_id.bytes -> Future instance
        :param timeout: The number of seconds to wait for all Futures to complete
        :param bubble: Whether to bubble results from collected subtasks
        """
        finished, timed_out = concurrent.futures.wait(
            subtask_futures.values(), timeout=timeout)
        self.task.align_result()
        for k, v in subtask_futures.items():
            if not v.done():
@ -86,11 +118,16 @@ class BaseAction(object):
                        "Uncaught excetion in subtask %s future:" % str(
                            uuid.UUID(bytes=k)),
                        exc_info=v.exception())
            st = self.state_manager.get_task(uuid.UUID(bytes=k))
            st.bubble_results()
            st.align_result()
            st.save()
        if len(timed_out) > 0:
            raise errors.CollectSubtaskTimeout(
                "One or more subtask threads did not finish in %d seconds." %
                timeout)
        self.task.align_result()
        return
    def _load_site_design(self):
@ -108,6 +145,26 @@ class BaseAction(object):
        return site_design
    def _get_driver(self, driver_type, subtype=None):
        """Locate the correct driver instance based on type and subtype.
        :param driver_type: The type of driver, 'oob', 'node', or 'network'
        :param subtype: In cases where multiple drivers can be active, select
                        one based on subtype
        """
        driver = None
        if driver_type == 'oob':
            if subtype is None:
                return None
            for d in self.orchestrator.enabled_drivers['oob']:
                if d.oob_type_support(subtype):
                    driver = d
                    break
        else:
            driver = self.orchestrator.enabled_drivers[driver_type]
        return driver
 class Noop(BaseAction):
    """Dummy action to allow the full task completion flow without impacts."""
@ -162,7 +219,10 @@ class ValidateDesign(BaseAction):
                self.task.design_ref)
            self.task.merge_status_messages(task_result=status)
            self.task.set_status(hd_fields.TaskStatus.Complete)
-            self.task.success()
+            if status.result.status == hd_fields.ActionResult.Success:
                self.task.success()
            else:
                self.task.failure()
            self.task.save()
        except Exception:
            self.task.set_status(hd_fields.TaskStatus.Complete)
@ -179,7 +239,7 @@ class VerifySite(BaseAction):
        self.task.set_status(hd_fields.TaskStatus.Running)
        self.task.save()
-        node_driver = self.orchestrator.enabled_drivers['node']
+        node_driver = self._get_driver('node')
        if node_driver is None:
            self.task.set_status(hd_fields.TaskStatus.Complete)
@ -225,7 +285,7 @@ class PrepareSite(BaseAction):
        self.task.set_status(hd_fields.TaskStatus.Running)
        self.task.save()
-        driver = self.orchestrator.enabled_drivers['node']
+        driver = self._get_driver('node')
        if driver is None:
            self.task.set_status(hd_fields.TaskStatus.Complete)
@ -240,6 +300,19 @@ class PrepareSite(BaseAction):
            self.task.save()
            return
        self.step_networktemplate(driver)
        self.step_usercredentials(driver)
        self.task.align_result()
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
        return
    def step_networktemplate(self, driver):
        """Run the CreateNetworkTemplate step of this action.
        :param driver: The driver instance to use for execution.
        """
        site_network_task = self.orchestrator.create_task(
            design_ref=self.task.design_ref,
            action=hd_fields.OrchestratorAction.CreateNetworkTemplate)
@ -259,6 +332,11 @@ class PrepareSite(BaseAction):
        self.logger.info("Node driver task %s complete" %
                         (site_network_task.get_id()))
    def step_usercredentials(self, driver):
        """Run the ConfigureUserCredentials step of this action.
        :param driver: The driver instance to use for execution.
        """
        user_creds_task = self.orchestrator.create_task(
            design_ref=self.task.design_ref,
            action=hd_fields.OrchestratorAction.ConfigureUserCredentials)
@ -276,12 +354,7 @@ class PrepareSite(BaseAction):
            ctx=str(user_creds_task.get_id()),
            ctx_type='task')
        self.logger.info("Node driver task %s complete" %
-                         (site_network_task.get_id()))
+                         (user_creds_task.get_id()))
        self.task.align_result()
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
        return
 class VerifyNodes(BaseAction):
@ -310,11 +383,7 @@ class VerifyNodes(BaseAction):
        task_futures = dict()
        for oob_type, oob_nodes in oob_type_partition.items():
-            oob_driver = None
+            oob_driver = self._get_driver('oob', oob_type)
            for d in self.orchestrator.enabled_drivers['oob']:
                if d.oob_type_support(oob_type):
                    oob_driver = d
                    break
            if oob_driver is None:
                self.logger.warning(
@ -366,11 +435,7 @@ class PrepareNodes(BaseAction):
        self.task.set_status(hd_fields.TaskStatus.Running)
        self.task.save()
-        # NOTE(sh8121att) Should we attempt to interrogate the node via Node
+        node_driver = self._get_driver('node')
        # Driver to see if it is in a deployed state before we
        # start rebooting? Or do we just leverage
        # Drydock internal state via site build data (when implemented)?
        node_driver = self.orchestrator.enabled_drivers['node']
        if node_driver is None:
            self.task.set_status(hd_fields.TaskStatus.Complete)
@ -388,129 +453,124 @@ class PrepareNodes(BaseAction):
        design_status, site_design = self.orchestrator.get_effective_site(
            self.task.design_ref)
-        target_nodes = self.orchestrator.process_node_filter(
+        target_nodes = self.orchestrator.get_target_nodes(self.task)
            self.task.node_filter, site_design)
-        oob_type_partition = {}
+        # Parallelize this task so that node progression is
        # not interlinked
        if len(target_nodes) > 1:
            self.split_action()
        else:
            check_task_id = self.step_checkexist(node_driver)
            node_check_task = self.state_manager.get_task(check_task_id)
-        for n in target_nodes:
+            # Build the node_filter from the failures of the node_check_task
-            if n.oob_type not in oob_type_partition.keys():
+            # as these are the nodes that are unknown to the node provisioner
-                oob_type_partition[n.oob_type] = []
+            # and are likely save to be rebooted
-            oob_type_partition[n.oob_type].append(n)
+            target_nodes = self.orchestrator.process_node_filter(
                node_check_task.node_filter_from_failures(), site_design)
-        task_futures = dict()
+            # And log the nodes that were found so they can be addressed
        oob_type_filters = dict()
-        for oob_type, oob_nodes in oob_type_partition.items():
+            for n in node_check_task.result.successes:
-            oob_driver = None
+                self.logger.debug(
-            for d in self.orchestrator.enabled_drivers['oob']:
+                    "Found node %s in provisioner, skipping OOB management." %
-                if d.oob_type_support(oob_type):
+                    (n))
-                    oob_driver = d
+                self.task.add_status_msg(
-                    break
+                    msg="Node found in provisioner, skipping OOB management",
                    error=False,
                    ctx=n,
                    ctx_type='node')
-            if oob_driver is None:
+            self.step_oob_set_netboot(target_nodes)
                self.logger.warning(
                    "Node OOB type %s has no enabled driver." % oob_type)
                self.task.failure()
                for n in oob_nodes:
                    self.task.add_status_msg(
                        msg="Node %s OOB type %s is not supported." %
                        (n.get_name(), oob_type),
                        error=True,
                        ctx=n.get_name(),
                        ctx_type='node')
                continue
-            oob_type_filters[
+            # bubble results from the SetNodeBoot task and
-                oob_type] = self.orchestrator.create_nodefilter_from_nodelist(
+            # continue the execution with successful targets
                    oob_nodes)
            setboot_task = self.orchestrator.create_task(
                design_ref=self.task.design_ref,
                action=hd_fields.OrchestratorAction.SetNodeBoot,
                node_filter=oob_type_filters[oob_type])
            self.task.register_subtask(setboot_task)
            self.logger.info(
                "Starting OOB driver task %s to set PXE boot for OOB type %s" %
                (setboot_task.get_id(), oob_type))
            task_futures.update(
                self._parallelize_subtasks(oob_driver.execute_task,
                                           [setboot_task.get_id()]))
        try:
            self._collect_subtask_futures(
                task_futures,
                timeout=(config.config_mgr.conf.timeouts.drydock_timeout * 60))
            # Get successful nodes and add it to the node filter
            # so the next step only happens for successfully configured nodes
            self.task.bubble_results(
                action_filter=hd_fields.OrchestratorAction.SetNodeBoot)
            for t, f in oob_type_filters.items():
                oob_type_filters[t]['filter_set'].append(
                    dict(
                        filter_type='union',
                        node_names=self.task.result.successes))
            self.logger.debug(
                "Collected subtasks for task %s" % str(self.task.get_id()))
        except errors.CollectSubtaskTimeout as ex:
            self.logger.warning(str(ex))
-        task_futures = dict()
+            target_nodes = self.orchestrator.process_node_filter(
                self.task.node_filter_from_successes(), site_design)
-        for oob_type, oob_nodes in oob_type_partition.items():
+            self.step_oob_powercycle(target_nodes)
            oob_driver = None
            for d in self.orchestrator.enabled_drivers['oob']:
                if d.oob_type_support(oob_type):
                    oob_driver = d
                    break
-            if oob_driver is None:
+            # bubble results from the Powercycle task and
-                self.logger.warning(
+            # continue the execution with successful targets
                    "Node OOB type %s has no enabled driver." % oob_type)
                self.task.failure()
                for n in oob_nodes:
                    self.task.add_status_msg(
                        msg="Node %s OOB type %s is not supported." %
                        (n.get_name(), oob_type),
                        error=True,
                        ctx=n.get_name(),
                        ctx_type='node')
                continue
            cycle_task = self.orchestrator.create_task(
                design_ref=self.task.design_ref,
                action=hd_fields.OrchestratorAction.PowerCycleNode,
                node_filter=oob_type_filters[oob_type])
            self.task.register_subtask(cycle_task)
            self.logger.info(
                "Starting OOB driver task %s to power cycle nodes for OOB type %s"
                % (cycle_task.get_id(), oob_type))
            task_futures.update(
                self._parallelize_subtasks(oob_driver.execute_task,
                                           [cycle_task.get_id()]))
        try:
            self._collect_subtask_futures(
                task_futures,
                timeout=(config.config_mgr.conf.timeouts.drydock_timeout * 60))
            # Get successful nodes and add it to the node filter
            # so the next step only happens for successfully configured nodes
            self.task.bubble_results(
                action_filter=hd_fields.OrchestratorAction.PowerCycleNode)
            for t, f in oob_type_filters.items():
                oob_type_filters[t]['filter_set'].append(
                    dict(
                        filter_type='union',
                        node_names=self.task.result.successes))
            self.logger.debug(
                "Collected subtasks for task %s" % str(self.task.get_id()))
        except errors.CollectSubtaskTimeout as ex:
            self.logger.warning(str(ex))
            target_nodes = self.orchestrator.process_node_filter(
                self.task.node_filter_from_successes(), site_design)
            self.step_node_identify(node_driver, target_nodes)
            # We can only commission nodes that were successfully identified in the provisioner
            self.task.bubble_results(
                action_filter=hd_fields.OrchestratorAction.IdentifyNode)
            target_nodes = self.orchestrator.process_node_filter(
                self.task.node_filter_from_successes(), site_design)
            self.step_node_configure_hw(node_driver, target_nodes)
        self.task.bubble_results()
        self.task.align_result()
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
        return
    def step_node_configure_hw(self, node_driver, node_list):
        """Execute the ConfigureHardware step of this action on a list of nodes.
        :param node_driver: driver instance to use for execution
        :param node_list: a list of objects.BaremetalNode instances
        :return: list of uuid.UUID task ids of the tasks executing this step
        """
        if len(node_list) > 0:
            self.logger.info(
                "Starting hardware configuration task on %d nodes." %
                len(node_list))
            node_commission_task = None
            while True:
                if node_commission_task is None:
                    node_commission_task = self.orchestrator.create_task(
                        design_ref=self.task.design_ref,
                        action=hd_fields.OrchestratorAction.ConfigureHardware,
                        node_filter=self.orchestrator.
                        create_nodefilter_from_nodelist(node_list))
                    self.task.register_subtask(node_commission_task)
                self.logger.info(
                    "Starting node driver task %s to commission nodes." %
                    (node_commission_task.get_id()))
                node_driver.execute_task(node_commission_task.get_id())
                node_commission_task = self.state_manager.get_task(
                    node_commission_task.get_id())
                try:
                    if not node_commission_task.retry_task(max_attempts=3):
                        break
                except errors.MaxRetriesReached:
                    self.task.failure()
                    break
            return [node_commission_task.get_id()]
        else:
            self.logger.warning(
                "No nodes successfully identified, skipping commissioning subtask"
            )
            return list()
    def step_node_identify(self, node_driver, node_list):
        """Execute the IdentifyNode step of this action on a list of nodes.
        :param node_driver: driver instance to use for execution
        :param node_list: a list of objects.BaremetalNode instances
        :return: list of uuid.UUID task ids of the tasks executing this step
        """
        # IdentifyNode success will take some time after PowerCycleNode finishes
        # Retry the operation a few times if it fails before considering it a final failure
        # Each attempt is a new task which might make the final task tree a bit confusing
@ -521,7 +581,6 @@ class PrepareNodes(BaseAction):
        self.logger.debug(
            "Will make max of %d attempts to complete the identify_node task."
            % max_attempts)
        nf = self.task.node_filter_from_successes()
        node_identify_task = None
        while True:
@ -529,7 +588,8 @@ class PrepareNodes(BaseAction):
                node_identify_task = self.orchestrator.create_task(
                    design_ref=self.task.design_ref,
                    action=hd_fields.OrchestratorAction.IdentifyNode,
-                    node_filter=nf)
+                    node_filter=self.orchestrator.
                    create_nodefilter_from_nodelist(node_list))
                self.task.register_subtask(node_identify_task)
            self.logger.info(
@ -552,46 +612,155 @@ class PrepareNodes(BaseAction):
                self.task.failure()
                break
-        # We can only commission nodes that were successfully identified in the provisioner
+        return [node_identify_task.get_id()]
-        if len(node_identify_task.result.successes) > 0:
+
-            target_nf = node_identify_task.node_filter_from_successes()
+    def step_oob_set_netboot(self, node_list):
        """Execute the SetNetBoot step of this action on a list of nodes.
        :param node_list: a list of objects.BaremetalNode instances
        :return: list of uuid.UUID task ids of the tasks executing this step
        """
        oob_type_partition = {}
        for n in node_list:
            if n.oob_type not in oob_type_partition.keys():
                oob_type_partition[n.oob_type] = []
            oob_type_partition[n.oob_type].append(n)
        task_futures = dict()
        for oob_type, oob_nodes in oob_type_partition.items():
            oob_driver = self._get_driver('oob', oob_type)
            if oob_driver is None:
                self.logger.warning(
                    "Node OOB type %s has no enabled driver." % oob_type)
                self.task.failure()
                for n in oob_nodes:
                    self.task.add_status_msg(
                        msg="Node %s OOB type %s is not supported." %
                        (n.get_name(), oob_type),
                        error=True,
                        ctx=n.get_name(),
                        ctx_type='node')
                continue
            setboot_task = self.orchestrator.create_task(
                design_ref=self.task.design_ref,
                action=hd_fields.OrchestratorAction.SetNodeBoot,
                node_filter=self.orchestrator.create_nodefilter_from_nodelist(
                    oob_nodes))
            self.task.register_subtask(setboot_task)
            self.logger.info(
-                "Found %s successfully identified nodes, starting commissioning."
+                "Starting OOB driver task %s to set PXE boot for OOB type %s" %
-                % (len(node_identify_task.result.successes)))
+                (setboot_task.get_id(), oob_type))
            task_futures.update(
                self._parallelize_subtasks(oob_driver.execute_task,
                                           [setboot_task.get_id()]))
-            node_commission_task = None
+        try:
-            while True:
+            self._collect_subtask_futures(
-                if node_commission_task is None:
+                task_futures,
-                    node_commission_task = self.orchestrator.create_task(
+                timeout=(config.config_mgr.conf.timeouts.drydock_timeout * 60))
-                        design_ref=self.task.design_ref,
+            self.logger.debug(
-                        action=hd_fields.OrchestratorAction.ConfigureHardware,
+                "Collected subtasks for task %s" % str(self.task.get_id()))
-                        node_filter=target_nf)
+        except errors.CollectSubtaskTimeout as ex:
-                    self.task.register_subtask(node_commission_task)
+            self.logger.warning(str(ex))
-                self.logger.info(
+        return [uuid.UUID(bytes=x) for x in task_futures.keys()]
                    "Starting node driver task %s to commission nodes." %
                    (node_commission_task.get_id()))
-                node_driver.execute_task(node_commission_task.get_id())
+    def step_oob_powercycle(self, node_list):
        """Execute the NodePowerCycle step of this action on a list of nodes.
-                node_commission_task = self.state_manager.get_task(
+        :param node_list: a list of objects.BaremetalNode instances
-                    node_commission_task.get_id())
+        :return: list of uuid.UUID task ids of the tasks executing this step
-                try:
+        """
-                    if not node_commission_task.retry_task(max_attempts=3):
+        oob_type_partition = {}
                        break
                except errors.MaxRetriesReached:
                    self.task.failure()
                    break
-        else:
+        for n in node_list:
-            self.logger.warning(
+            if n.oob_type not in oob_type_partition.keys():
-                "No nodes successfully identified, skipping commissioning subtask"
+                oob_type_partition[n.oob_type] = []
            )
-        self.task.align_result()
+            oob_type_partition[n.oob_type].append(n)
-        self.task.set_status(hd_fields.TaskStatus.Complete)
+
-        self.task.save()
+        task_futures = dict()
-        return
+
        for oob_type, oob_nodes in oob_type_partition.items():
            oob_driver = self._get_driver('oob', oob_type)
            if oob_driver is None:
                self.logger.warning(
                    "Node OOB type %s has no enabled driver." % oob_type)
                self.task.failure()
                for n in oob_nodes:
                    self.task.add_status_msg(
                        msg="Node %s OOB type %s is not supported." %
                        (n.get_name(), oob_type),
                        error=True,
                        ctx=n.get_name(),
                        ctx_type='node')
                continue
            cycle_task = self.orchestrator.create_task(
                design_ref=self.task.design_ref,
                action=hd_fields.OrchestratorAction.PowerCycleNode,
                node_filter=self.orchestrator.create_nodefilter_from_nodelist(
                    oob_nodes))
            self.task.register_subtask(cycle_task)
            self.logger.info(
                "Starting OOB driver task %s to power cycle nodes for OOB type %s"
                % (cycle_task.get_id(), oob_type))
            task_futures.update(
                self._parallelize_subtasks(oob_driver.execute_task,
                                           [cycle_task.get_id()]))
        try:
            self._collect_subtask_futures(
                task_futures,
                timeout=(config.config_mgr.conf.timeouts.drydock_timeout * 60))
            self.logger.debug(
                "Collected subtasks for task %s" % str(self.task.get_id()))
        except errors.CollectSubtaskTimeout as ex:
            self.logger.warning(str(ex))
        return [uuid.UUID(bytes=x) for x in task_futures.keys()]
    def split_action(self):
        """Split this action into independent threads per node."""
        action_timeout = config.config_mgr.conf.timeouts.identify_node + \
            config.config_mgr.conf.timeouts.configure_hardware
        split_tasks = self._split_action()
        self._collect_subtask_futures(split_tasks, timeout=action_timeout * 60)
    def step_checkexist(self, driver):
        """Query the driver for node existence to prevent impacting deployed nodes.
        :param driver: driver instance to use for execution
        :returns: uuid.UUID task id of the task that executed this step
        """
        node_check_task = self.orchestrator.create_task(
            design_ref=self.task.design_ref,
            action=hd_fields.OrchestratorAction.IdentifyNode,
            node_filter=self.task.node_filter)
        self.logger.info("Starting check task %s before rebooting nodes" %
                         (node_check_task.get_id()))
        task_futures = self._parallelize_subtasks(driver.execute_task,
                                                  [node_check_task.get_id()])
        self._collect_subtask_futures(
            task_futures,
            timeout=(config.config_mgr.conf.timeouts.drydock_timeout * 60))
        node_check_task = self.state_manager.get_task(node_check_task.get_id())
        node_check_task.bubble_results()
        node_check_task.save()
        return node_check_task.get_id()
 class DeployNodes(BaseAction):
@ -699,6 +868,7 @@ class DeployNodes(BaseAction):
                        action=hd_fields.OrchestratorAction.DeployNode,
                        node_filter=node_platform_task.
                        node_filter_from_successes())
                    self.task.register_subtask(node_deploy_task)
                self.logger.info(
                    "Starting node driver task %s to deploy nodes." %
@ -719,20 +889,23 @@ class DeployNodes(BaseAction):
                "Unable to configure platform on any nodes, skipping deploy subtask"
            )
        node_deploy_task.bubble_results(
            action_filter=hd_fields.OrchestratorAction.DeployNode)
        if len(node_deploy_task.result.successes) > 0:
            node_bootaction_task = self.orchestrator.create_task(
                design_ref=self.task.design_ref,
                action=hd_fields.OrchestratorAction.BootactionReport,
                node_filter=node_deploy_task.node_filter_from_successes())
            self.task.register_subtask(node_bootaction_task)
            action = BootactionReport(node_bootaction_task, self.orchestrator,
                                      self.state_manager)
            action.start()
            self.task.bubble_results(
                action_filter=hd_fields.OrchestratorAction.BootactionReport)
            self.task.align_result()
        else:
            self.task.bubble_results(
                action_filter=hd_fields.OrchestratorAction.DeployNode)
            self.task.align_result()
        self.task.align_result(
            action_filter=hd_fields.OrchestratorAction.BootactionReport)
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
        return
@ -745,16 +918,18 @@ class BootactionReport(BaseAction):
        self.task.set_status(hd_fields.TaskStatus.Running)
        self.task.save()
-        poll_start = datetime.utcnow()
+        poll_start = datetime.datetime.utcnow()
        still_running = True
        timeout = datetime.timedelta(
            minutes=config.config_mgr.conf.timeouts.bootaction_final_status)
-        running_time = datetime.utcnow() - poll_start
+        running_time = datetime.datetime.utcnow() - poll_start
        nodelist = [
            n.get_id() for n in self.orchestrator.get_target_nodes(self.task)
        ]
        self.logger.debug(
            "Waiting for bootaction response signals to complete.")
        while running_time < timeout:
            still_running = False
            for n in nodelist:
@ -769,8 +944,14 @@ class BootactionReport(BaseAction):
                    still_running = True
                    break
            if still_running:
                self.logger.debug("Still waiting on %d running bootactions." %
                                  len(running_bas))
                time.sleep(config.config_mgr.conf.poll_interval)
-                running_time = datetime.utcnow()
+                running_time = datetime.datetime.utcnow() - poll_start
            else:
                break
        self.logger.debug("Signals complete or timeout reached.")
        for n in nodelist:
            bas = self.state_manager.get_boot_actions_for_node(n)
@ -815,5 +996,7 @@ class BootactionReport(BaseAction):
            else:
                self.task.failure(focus=n)
        self.logger.debug("Completed collecting bootaction signals.")
        self.task.set_status(hd_fields.TaskStatus.Complete)
        self.task.save()
        return
--- a/drydock_provisioner/orchestrator/orchestrator.py
+++ b/drydock_provisioner/orchestrator/orchestrator.py
@ -130,7 +130,7 @@ class Orchestrator(object):
                    "Orchestrator %s denied leadership, sleeping to try again."
                    % str(self.orch_id))
                # TODO(sh8121att) Make this configurable
-                time.sleep(300)
+                time.sleep(config.config_mgr.conf.leadership_claim_interval)
            else:
                self.logger.info(
                    "Orchestrator %s successfully claimed leadership, polling for tasks."
--- a/drydock_provisioner/statemgmt/db/tables.py
+++ b/drydock_provisioner/statemgmt/db/tables.py
@ -34,6 +34,7 @@ class Tasks(ExtendTable):
        Column('updated', DateTime),
        Column('design_ref', String(128)),
        Column('request_context', pg.JSON),
        Column('node_filter', pg.JSON),
        Column('action', String(32)),
        Column('terminated', DateTime),
        Column('terminated_by', String(16)),
--- a/entrypoint.sh
+++ b/entrypoint.sh
@ -3,10 +3,21 @@ set -ex
 CMD="drydock"
 PORT=${PORT:-9000}
 # Number of uWSGI workers to handle API requests
 DRYDOCK_API_WORKERS=${DRYDOCK_API_WORKERS:-"1"}
 # Threads per worker
 DRYDOCK_API_THREADS=${DRYDOCK_API_THREADS:-"4"}
 if [ "$1" = 'server' ]; then
-#    exec uwsgi --http :${PORT} -w drydock_provisioner.drydock --callable drydock --enable-threads -L --pyargv "--config-file /etc/drydock/drydock.conf"
+# Run Drydock under uWSGI
-    exec uwsgi --http :${PORT} --paste config:/etc/drydock/api-paste.ini --enable-threads -L --pyargv "--config-file /etc/drydock/drydock.conf"
+#   --http - Port to listen for requests on
 #   --paste - Use DeployPaste for handling requests and use the specified config INI file
 #   --enable-threads - Enable the Python GIL so that service can be multithreaded
 #   -L - Turn off uWSGI request logging, rely totally on logging within the application
 #   --pyargs - Provide some command line arguments to the Python executable
 #   --threads - Number of threads each uWSGI worker should use for handling requests
 #   --workers - Number of uWSGI workers/processes for handling requests
    exec uwsgi --http :${PORT} --paste config:/etc/drydock/api-paste.ini --enable-threads -L --pyargv "--config-file /etc/drydock/drydock.conf" --threads $DRYDOCK_API_THREADS --workers $DRYDOCK_API_WORKERS
 fi
 exec ${CMD} $@
--- a/images/drydock/Dockerfile
+++ b/images/drydock/Dockerfile
--- a/tests/integration/postgres/test_action_prepare_site.py
+++ b/tests/integration/postgres/test_action_prepare_site.py
@ -0,0 +1,52 @@
 # Copyright 2017 AT&T Intellectual Property.  All other rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Generic testing for the orchestrator."""
 import pytest
 import drydock_provisioner.orchestrator.orchestrator as orch
 import drydock_provisioner.objects.fields as hd_fields
 from drydock_provisioner.orchestrator.actions.orchestrator import PrepareSite
 class TestActionPrepareSite(object):
    @pytest.mark.skip(reason="test failure fixed in next PS")
    def test_preparesite(self, input_files, deckhand_ingester, setup,
                         drydock_state):
        input_file = input_files.join("deckhand_fullsite.yaml")
        design_ref = "file://%s" % str(input_file)
        # Build a dummy object that looks like an oslo_config object
        # so the orchestrator is configured w/ Noop drivers
        class DummyConf(object):
            oob_driver = list()
            node_driver = 'drydock_provisioner.drivers.node.driver.NodeDriver'
            network_driver = None
        orchestrator = orch.Orchestrator(
            enabled_drivers=DummyConf(),
            state_manager=drydock_state,
            ingester=deckhand_ingester)
        task = orchestrator.create_task(
            design_ref=design_ref,
            action=hd_fields.OrchestratorAction.PrepareSite)
        action = PrepareSite(task, orchestrator, drydock_state)
        action.start()
        task = drydock_state.get_task(task.get_id())
        assert task.result.status == hd_fields.ActionResult.Success
--- a/tests/integration/postgres/test_api_bootaction.py
+++ b/tests/integration/postgres/test_api_bootaction.py
@ -66,14 +66,14 @@ class TestClass(object):
        assert result.status == falcon.HTTP_403
    @pytest.fixture()
-    def seed_bootaction(self, blank_state, test_orchestrator, input_files):
+    def seed_bootaction(self, blank_state, yaml_orchestrator, input_files):
        """Add a task and boot action to the database for testing."""
        input_file = input_files.join("fullsite.yaml")
        design_ref = "file://%s" % input_file
-        test_task = test_orchestrator.create_task(
+        test_task = yaml_orchestrator.create_task(
            action=hd_fields.OrchestratorAction.Noop, design_ref=design_ref)
-        id_key = test_orchestrator.create_bootaction_context(
+        id_key = yaml_orchestrator.create_bootaction_context(
            'compute01', test_task)
        ba_ctx = dict(
@ -83,10 +83,10 @@ class TestClass(object):
        return ba_ctx
    @pytest.fixture()
-    def falcontest(self, drydock_state, test_ingester, test_orchestrator):
+    def falcontest(self, drydock_state, yaml_ingester, yaml_orchestrator):
        """Create a test harness for the the Falcon API framework."""
        return testing.TestClient(
            start_api(
                state_manager=drydock_state,
-                ingester=test_ingester,
+                ingester=yaml_ingester,
-                orchestrator=test_orchestrator))
+                orchestrator=yaml_orchestrator))
--- a/tests/integration/postgres/test_api_bootaction_status.py
+++ b/tests/integration/postgres/test_api_bootaction_status.py
@ -93,12 +93,12 @@ class TestClass(object):
        assert result.status == falcon.HTTP_400
    @pytest.fixture()
-    def seed_bootaction_status(self, blank_state, test_orchestrator,
+    def seed_bootaction_status(self, blank_state, yaml_orchestrator,
                               input_files):
        """Add a task and boot action to the database for testing."""
        input_file = input_files.join("fullsite.yaml")
        design_ref = "file://%s" % input_file
-        test_task = test_orchestrator.create_task(
+        test_task = yaml_orchestrator.create_task(
            action=hd_fields.OrchestratorAction.Noop, design_ref=design_ref)
        id_key = os.urandom(32)
@ -115,10 +115,10 @@ class TestClass(object):
        return ba
    @pytest.fixture()
-    def falcontest(self, drydock_state, test_ingester, test_orchestrator):
+    def falcontest(self, drydock_state, yaml_ingester, yaml_orchestrator):
        """Create a test harness for the the Falcon API framework."""
        return testing.TestClient(
            start_api(
                state_manager=drydock_state,
-                ingester=test_ingester,
+                ingester=yaml_ingester,
-                orchestrator=test_orchestrator))
+                orchestrator=yaml_orchestrator))
--- a/tests/integration/postgres/test_postgres.sh
+++ b/tests/integration/postgres/test_postgres.sh
@ -1,16 +0,0 @@
 #!/bin/bash
 sudo docker run --rm -dp 5432:5432 --name 'psql_integration' postgres:9.5
 sleep 15
 psql -h localhost -c "create user drydock with password 'drydock';" postgres postgres
 psql -h localhost -c "create database drydock;" postgres postgres
 sudo docker run --rm -t --net=host -e DRYDOCK_DB_URL="postgresql+psycopg2://drydock:drydock@localhost:5432/drydock" --entrypoint /usr/local/bin/alembic drydock:latest upgrade head
 py.test $1
 RESULT=$?
 sudo docker stop psql_integration
 exit $RESULT
--- a/tests/postgres/start_postgres.sh
+++ b/tests/postgres/start_postgres.sh
@ -0,0 +1,25 @@
 #!/bin/bash
 set -x
 DRYDOCK_IMAGE="${IMAGE_PREFIX}/${DRYDOCK_IMAGE_NAME}:${IMAGE_TAG}"
 if [[ ! -z $(docker ps | grep 'psql_integration') ]]
 then
  sudo docker stop 'psql_integration'
 fi
 DRYDOCK_IMAGE=${DRYDOCK_IMAGE:-"drydock:latest"}
 if [[ ! -z $(docker ps | grep 'psql_integration') ]]
 then
  sudo docker stop 'psql_integration'
 fi
 sudo docker run --rm -dp 5432:5432 --name 'psql_integration' postgres:9.5
 sleep 15
 docker run --rm --net host postgres:9.5 psql -h localhost -c "create user drydock with password 'drydock';" postgres postgres
 docker run --rm --net host postgres:9.5 psql -h localhost -c "create database drydock;" postgres postgres
 export DRYDOCK_DB_URL="postgresql+psycopg2://drydock:drydock@localhost:5432/drydock"
 sudo docker run --rm -t --net=host -e DRYDOCK_DB_URL="$DRYDOCK_DB_URL" --entrypoint /usr/local/bin/alembic $DRYDOCK_IMAGE upgrade head
--- a/tests/postgres/stop_postgres.sh
+++ b/tests/postgres/stop_postgres.sh
@ -0,0 +1,2 @@
 #!/bin/bash
 sudo docker stop 'psql_integration'
--- a/tests/start_postgres.sh
+++ b/tests/start_postgres.sh
@ -1,10 +0,0 @@
 #!/bin/bash
 sudo docker run --rm -dp 5432:5432 --name 'psql_integration' postgres:9.5
 sleep 15
 psql -h localhost -c "create user drydock with password 'drydock';" postgres postgres
 psql -h localhost -c "create database drydock;" postgres postgres
 sudo docker run --rm -t --net=host -e DRYDOCK_DB_URL="postgresql+psycopg2://drydock:drydock@localhost:5432/drydock" --entrypoint /usr/local/bin/alembic drydock:latest upgrade head
 export DRYDOCK_DB_URL="postgresql+psycopg2://drydock:drydock@localhost:5432/drydock" 
--- a/tools/drydock_image_run.sh
+++ b/tools/drydock_image_run.sh
@ -0,0 +1,86 @@
 #!/bin/bash
 set -x
 DRYDOCK_IMAGE="${IMAGE_PREFIX}/${DRYDOCK_IMAGE_NAME}:${IMAGE_TAG}"
 function start_db {
    if [[ ! -z $(docker ps | grep 'psql_integration') ]]
    then
      sudo docker stop 'psql_integration'
    fi
    docker run --rm -dp 5432:5432 --name 'psql_integration' postgres:9.5
    sleep 15
    docker run --rm --net host postgres:9.5 psql -h localhost -c "create user drydock with password 'drydock';" postgres postgres
    docker run --rm --net host postgres:9.5 psql -h localhost -c "create database drydock;" postgres postgres
 }
 function customize_conf {
    default_conffile=$1
    custom_conffile=$2
    DRYDOCK_DB_URL=$(echo ${DRYDOCK_DB_URL} | sed -e 's/[\/&]/\\&/g')
    sed -e "s/^#database_connect_string = <None>/database_connect_string = ${DRYDOCK_DB_URL}/" \
      $default_conffile > $custom_conffile
 }
 function generate_conf {
    tox -e genconfig > /dev/null
    tox -e genpolicy > /dev/null
    ETCDIR=$(mktemp -d)/drydock
    mkdir -p ${ETCDIR} > /dev/null
    cp etc/drydock/noauth-api-paste.ini ${ETCDIR}/api-paste.ini
    customize_conf etc/drydock/drydock.conf.sample ${ETCDIR}/drydock.conf
    cp etc/drydock/policy.yaml.sample ${ETCDIR}/policy.yaml
    echo ${ETCDIR}
 }
 function init_db {
    docker run --rm -t --net=host \
      -e DRYDOCK_DB_URL="${DRYDOCK_DB_URL}" \
      --entrypoint /usr/local/bin/alembic \
      ${DRYDOCK_IMAGE} \
      upgrade head
 }
 function test_drydock {
    TMPETC=$1
    docker run \
      -d --name 'drydock_test' --net host \
      -v ${TMPETC}:/etc/drydock \
      ${DRYDOCK_IMAGE}
    RESULT=$(curl --noproxy '*' -i 'http://127.0.0.1:9000/api/v1.0/tasks' | tr '\r' '\n' | head -1)
    GOOD="HTTP/1.1 200 OK"
    if [[ ${RESULT} == ${GOOD} ]]
    then
      RC=0
    else
      RC=1
    fi
    docker logs drydock_test
    return $RC
 }
 function cleanup {
    TMPDIR=$1
    docker stop psql_integration
    docker stop drydock_test
    docker rm drydock_test
 #    rm -rf $TMPDIR
 }
 start_db
 export DRYDOCK_DB_URL="postgresql+psycopg2://drydock:drydock@localhost:5432/drydock"
 init_db
 TMPETC=$(generate_conf)
 test_drydock $TMPETC
 RC=$?
 cleanup $TMPETC
 exit $RC
--- a/tools/helm_tk.sh
+++ b/tools/helm_tk.sh
@ -0,0 +1,65 @@
 #!/bin/bash
 # Copyright 2017 AT&T Intellectual Property.  All other rights reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Script to setup helm-toolkit and helm dep up the shipyard chart
 #
 HELM=$1
 HTK_REPO=${HTK_REPO:-"https://github.com/openstack/openstack-helm"}
 HTK_PATH=${HTK_PATH:-""}
 DEP_UP_LIST=${DEP_UP_LIST:-"drydock"}
 if [[ ! -z $(echo $http_proxy) ]]
 then
  export no_proxy=$no_proxy,127.0.0.1
 fi
 set -x
 function helm_serve {
  if [[ -d "$HOME/.helm" ]]; then
     echo ".helm directory found"
  else
     ${HELM} init --client-only
  fi
  if [[ -z $(curl -s 127.0.0.1:8879 | grep 'Helm Repository') ]]; then
     ${HELM} serve & > /dev/null
     while [[ -z $(curl -s 127.0.0.1:8879 | grep 'Helm Repository') ]]; do
        sleep 1
        echo "Waiting for Helm Repository"
     done
  else
     echo "Helm serve already running"
  fi
  if ${HELM} repo list | grep -q "^stable" ; then
     ${HELM} repo remove stable
  fi
  ${HELM} repo add local http://localhost:8879/charts
 }
 mkdir -p build
 pushd build
 git clone --depth 1 $HTK_REPO || true
 pushd openstack-helm/$HTK_PATH
 git pull
 helm_serve
 make helm-toolkit
 popd && popd
 for c in $DEP_UP_LIST
 do
  ${HELM} dep up charts/$c
 done
--- a/tox.ini
+++ b/tox.ini
@ -1,5 +1,5 @@
 [tox]
-envlist = coverage-unit,pep8,bandit
+envlist = pep8,bandit,unit
 [testenv]
 setenv = YAMLDIR = {toxinidir}/tests/yaml_samples/
@ -26,14 +26,6 @@ commands=
  find {toxinidir}/drydock_provisioner -name '__init__.py' -exec yapf -i --style=pep8 \{\} ;
 [testenv:unit]
 setenv=
  PYTHONWARNING=all
  YAMLDIR={toxinidir}/tests/yaml_samples/
 commands=
  py.test \
    tests/unit/{posargs}
 [testenv:coverage-unit]
 usedevelop=True
 setenv=
  PYTHONWARNING=all
@ -41,18 +33,26 @@ setenv=
 commands=
  py.test \
    --cov=drydock_provisioner \
-    tests/unit/{posargs}
+    {toxinidir}/tests/unit/{posargs}
 [testenv:integration]
 passenv=DRYDOCK_IMAGE_NAME IMAGE_PREFIX IMAGE_TAG
 setenv=
  PYTHONWARNING=all
 commands=
  {toxinidir}/tests/postgres/start_postgres.sh
  py.test \
-    tests/integration/{posargs}
+    {toxinidir}/tests/integration/postgres/{posargs}
-[testenv:postgres]
+[testenv:coverage]
 usedevelop=True
 passenv=DRYDOCK_IMAGE_NAME IMAGE_PREFIX IMAGE_TAG
 setenv=
  YAMLDIR={toxinidir}/tests/yaml_samples/
 commands=
-    {toxinidir}/tests/integration/postgres/test_postgres.sh {toxinidir}/tests/integration/postgres/{posargs}
+  {toxinidir}/tests/postgres/start_postgres.sh
  py.test --cov=drydock_provisioner \
    {toxinidir}/tests/unit/ {toxinidir}/tests/integration/postgres
 [testenv:genconfig]
 commands = oslo-config-generator --config-file=etc/drydock/drydock-config-generator.conf
		`@ -0,0 +1,2 @@`
							`#!/bin/bash`
							`sudo docker stop 'psql_integration'`