[v2 docs] Overhaul wait API

See the v1-v2 migration guide updates in this commit for details. Change-Id: I6a8a69f8392e8065eda039597278c7dfe593a4fd
2019-02-12 12:51:16 -06:00 · 2019-02-12 12:51:16 -06:00 · 5ffa12fabe
parent 1eb7602d44
commit 5ffa12fabe
7 changed files with 459 additions and 165 deletions
--- a/armada/handlers/chart_deploy.py
+++ b/armada/handlers/chart_deploy.py
@ -64,7 +64,7 @@ class ChartDeploy(object):
        chart_wait = ChartWait(
            self.tiller.k8s,
            release_name,
-            chart,
+            ch,
            namespace,
            k8s_wait_attempts=self.k8s_wait_attempts,
            k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
--- a/armada/handlers/wait.py
+++ b/armada/handlers/wait.py
@ -14,23 +14,26 @@

 from abc import ABC, abstractmethod
 import collections
+import copy
 import math
 import re
 import time

+from kubernetes import watch
 from oslo_log import log as logging

 from armada import const
-from armada.utils.helm import is_test_pod
-from armada.utils.release import label_selectors
 from armada.exceptions import k8s_exceptions
 from armada.exceptions import manifest_exceptions
 from armada.exceptions import armada_exceptions
-from kubernetes import watch
+from armada.handlers.schema import get_schema_info
+from armada.utils.helm import is_test_pod
+from armada.utils.release import label_selectors

 LOG = logging.getLogger(__name__)

 ROLLING_UPDATE_STRATEGY_TYPE = 'RollingUpdate'
+ASYNC_UPDATE_NOT_ALLOWED_MSG = 'Async update not allowed: '


 def get_wait_labels(chart):
@ -46,36 +49,52 @@ class ChartWait():
        self.k8s = k8s
        self.release_name = release_name
        self.chart = chart
-        self.wait_config = chart.get('wait', {})
+        chart_data = self.chart[const.KEYWORD_DATA]
+        self.chart_data = chart_data
+        self.wait_config = self.chart_data.get('wait', {})
        self.namespace = namespace
        self.k8s_wait_attempts = max(k8s_wait_attempts, 1)
        self.k8s_wait_attempt_sleep = max(k8s_wait_attempt_sleep, 1)

-        resources = self.wait_config.get('resources')
-        labels = get_wait_labels(self.chart)
+        schema_info = get_schema_info(self.chart['schema'])

-        if resources is not None:
-            waits = []
-            for resource_config in resources:
-                # Initialize labels
-                resource_config.setdefault('labels', {})
-                # Add base labels
-                resource_config['labels'].update(labels)
-                waits.append(self.get_resource_wait(resource_config))
+        resources = self.wait_config.get('resources')
+        if isinstance(resources, list):
+            # Explicit resource config list provided.
+            resources_list = resources
        else:
-            waits = [
-                JobWait('job', self, labels, skip_if_none_found=True),
-                PodWait('pod', self, labels)
-            ]
-        self.waits = waits
+            # TODO: Remove when v1 doc support is removed.
+            if schema_info.version < 2:
+                resources_list = [{
+                    'type': 'job',
+                    'required': False
+                }, {
+                    'type': 'pod'
+                }]
+            else:
+                resources_list = self.get_resources_list(resources)
+
+        chart_labels = get_wait_labels(self.chart_data)
+        for resource_config in resources_list:
+            # Use chart labels as base labels for each config.
+            labels = dict(chart_labels)
+            resource_labels = resource_config.get('labels', {})
+            # Merge in any resource-specific labels.
+            if resource_labels:
+                labels.update(resource_labels)
+            resource_config['labels'] = labels
+
+        LOG.debug('Resolved `wait.resources` list: %s', resources_list)
+
+        self.waits = [self.get_resource_wait(conf) for conf in resources_list]

        # Calculate timeout
        wait_timeout = timeout
        if wait_timeout is None:
            wait_timeout = self.wait_config.get('timeout')

-        # TODO(MarshM): Deprecated, remove `timeout` key.
-        deprecated_timeout = self.chart.get('timeout')
+        # TODO: Remove when v1 doc support is removed.
+        deprecated_timeout = self.chart_data.get('timeout')
        if deprecated_timeout is not None:
            LOG.warn('The `timeout` key is deprecated and support '
                     'for this will be removed soon. Use '
@ -90,12 +109,19 @@ class ChartWait():

        self.timeout = wait_timeout

+        # Determine whether to enable native wait.
+        native = self.wait_config.get('native', {})
+
+        # TODO: Remove when v1 doc support is removed.
+        default_native = schema_info.version < 2
+
+        self.native_enabled = native.get('enabled', default_native)
+
    def get_timeout(self):
        return self.timeout

    def is_native_enabled(self):
-        native_wait = self.wait_config.get('native', {})
-        return native_wait.get('enabled', True)
+        return self.native_enabled

    def wait(self, timeout):
        deadline = time.time() + timeout
@ -104,6 +130,54 @@ class ChartWait():
            wait.wait(timeout=timeout)
            timeout = int(round(deadline - time.time()))

+    def get_resources_list(self, resources):
+        # Use default resource configs, with any provided resource type
+        # overrides merged in.
+
+        # By default, wait on all supported resource types.
+        resource_order = [
+            # Jobs may perform initialization so add them first.
+            'job',
+            'daemonset',
+            'statefulset',
+            'deployment',
+            'pod'
+        ]
+        base_resource_config = {
+            # By default, skip if none found so we don't fail on charts
+            # which don't contain resources of a given type.
+            'required': False
+        }
+        # Create a map of resource types to default configs.
+        resource_configs = collections.OrderedDict(
+            [(type, base_resource_config) for type in resource_order])
+
+        # Handle any overrides and/or removals of resource type configs.
+        if resources:
+            for resource_type, v in resources.items():
+                if v is False:
+                    # Remove this type.
+                    resource_configs.pop(resource_type)
+                else:
+                    # Override config for this type.
+                    resource_configs[resource_type] = v
+
+        resources_list = []
+        # Convert the resource type map to a list of fully baked resource
+        # configs with type included.
+        for resource_type, config in resource_configs.items():
+            if isinstance(config, list):
+                configs = config
+            else:
+                configs = [config]
+
+            for conf in configs:
+                resource_config = copy.deepcopy(conf)
+                resource_config['type'] = resource_type
+                resources_list.append(resource_config)
+
+        return resources_list
+
    def get_resource_wait(self, resource_config):

        kwargs = dict(resource_config)
@ -138,12 +212,12 @@ class ResourceWait(ABC):
                 chart_wait,
                 labels,
                 get_resources,
-                 skip_if_none_found=False):
+                 required=True):
        self.resource_type = resource_type
        self.chart_wait = chart_wait
        self.label_selector = label_selectors(labels)
        self.get_resources = get_resources
-        self.skip_if_none_found = skip_if_none_found
+        self.required = required

    @abstractmethod
    def is_resource_ready(self, resource):
@ -174,19 +248,19 @@ class ResourceWait(ABC):

    def handle_resource(self, resource):
        resource_name = resource.metadata.name
+        resource_desc = '{} {}'.format(self.resource_type, resource_name)

        try:
            message, resource_ready = self.is_resource_ready(resource)

            if resource_ready:
-                LOG.debug('Resource %s is ready!', resource_name)
+                LOG.debug('%s is ready!', resource_desc)
            else:
-                LOG.debug('Resource %s not ready: %s', resource_name, message)
+                LOG.debug('%s not ready: %s', resource_desc, message)

            return resource_ready
        except armada_exceptions.WaitException as e:
-            LOG.warn('Resource %s unlikely to become ready: %s', resource_name,
-                     e)
+            LOG.warn('%s unlikely to become ready: %s', resource_desc, e)
            return False

    def wait(self, timeout):
@ -194,12 +268,13 @@ class ResourceWait(ABC):
        :param timeout: time before disconnecting ``Watch`` stream
        '''

+        min_ready_msg = ', min_ready={}'.format(
+            self.min_ready.source) if isinstance(self, ControllerWait) else ''
        LOG.info(
-            "Waiting for resource type=%s, namespace=%s labels=%s for %ss "
-            "(k8s wait %s times, sleep %ss)", self.resource_type,
-            self.chart_wait.namespace, self.label_selector, timeout,
-            self.chart_wait.k8s_wait_attempts,
-            self.chart_wait.k8s_wait_attempt_sleep)
+            "Waiting for resource type=%s, namespace=%s labels=%s "
+            "required=%s%s for %ss", self.resource_type,
+            self.chart_wait.namespace, self.label_selector, self.required,
+            min_ready_msg, timeout)
        if not self.label_selector:
            LOG.warn('"label_selector" not specified, waiting with no labels '
                     'may cause unintended consequences.')
@ -207,60 +282,73 @@ class ResourceWait(ABC):
        # Track the overall deadline for timing out during waits
        deadline = time.time() + timeout

-        # NOTE(mark-burnett): Attempt to wait multiple times without
-        # modification, in case new resources appear after our watch exits.
-
-        successes = 0
-        while True:
-            deadline_remaining = int(round(deadline - time.time()))
-            if deadline_remaining <= 0:
-                error = (
-                    "Timed out waiting for resource type={}, namespace={}, "
-                    "labels={}".format(self.resource_type,
-                                       self.chart_wait.namespace,
-                                       self.label_selector))
-                LOG.error(error)
-                raise k8s_exceptions.KubernetesWatchTimeoutException(error)
-
-            timed_out, modified, unready, found_resources = (
-                self._watch_resource_completions(timeout=deadline_remaining))
-
-            if (not found_resources) and self.skip_if_none_found:
-                return
-
-            if timed_out:
-                if not found_resources:
-                    details = (
-                        'None found! Are `wait.labels` correct? Does '
-                        '`wait.resources` need to exclude `type: {}`?'.format(
-                            self.resource_type))
+        schema_info = get_schema_info(self.chart_wait.chart['schema'])
+        # TODO: Remove when v1 doc support is removed.
+        if schema_info.version < 2:
+            # NOTE(mark-burnett): Attempt to wait multiple times without
+            # modification, in case new resources appear after our watch exits.
+            successes = 0
+            while True:
+                modified = self._wait(deadline)
+                if modified is None:
+                    break
+                if modified:
+                    successes = 0
+                    LOG.debug('Found modified resources: %s', sorted(modified))
                else:
-                    details = ('These {}s were not ready={}'.format(
-                        self.resource_type, sorted(unready)))
-                error = (
-                    'Timed out waiting for {}s (namespace={}, labels=({})). {}'
-                    .format(self.resource_type, self.chart_wait.namespace,
-                            self.label_selector, details))
-                LOG.error(error)
-                raise k8s_exceptions.KubernetesWatchTimeoutException(error)
+                    successes += 1
+                    LOG.debug('Found no modified resources.')

-            if modified:
-                successes = 0
-                LOG.debug('Found modified resources: %s', sorted(modified))
+                if successes >= self.chart_wait.k8s_wait_attempts:
+                    return
+
+                LOG.debug(
+                    'Continuing to wait: %s consecutive attempts without '
+                    'modified resources of %s required.', successes,
+                    self.chart_wait.k8s_wait_attempts)
+                time.sleep(self.chart_wait.k8s_wait_attempt_sleep)
+        else:
+            self._wait(deadline)
+
+    def _wait(self, deadline):
+        '''
+        Waits for resources to become ready.
+        Returns whether resources were modified, or `None` if that is to be
+        ignored.
+        '''
+
+        deadline_remaining = int(round(deadline - time.time()))
+        if deadline_remaining <= 0:
+            error = ("Timed out waiting for resource type={}, namespace={}, "
+                     "labels={}".format(self.resource_type,
+                                        self.chart_wait.namespace,
+                                        self.label_selector))
+            LOG.error(error)
+            raise k8s_exceptions.KubernetesWatchTimeoutException(error)
+
+        timed_out, modified, unready, found_resources = (
+            self._watch_resource_completions(timeout=deadline_remaining))
+
+        if (not found_resources) and not self.required:
+            return None
+
+        if timed_out:
+            if not found_resources:
+                details = (
+                    'None found! Are `wait.labels` correct? Does '
+                    '`wait.resources` need to exclude `type: {}`?'.format(
+                        self.resource_type))
            else:
-                successes += 1
-                LOG.debug('Found no modified resources.')
+                details = ('These {}s were not ready={}'.format(
+                    self.resource_type, sorted(unready)))
+            error = (
+                'Timed out waiting for {}s (namespace={}, labels=({})). {}'.
+                format(self.resource_type, self.chart_wait.namespace,
+                       self.label_selector, details))
+            LOG.error(error)
+            raise k8s_exceptions.KubernetesWatchTimeoutException(error)

-            if successes >= self.chart_wait.k8s_wait_attempts:
-                break
-
-            LOG.debug(
-                'Continuing to wait: %s consecutive attempts without '
-                'modified resources of %s required.', successes,
-                self.chart_wait.k8s_wait_attempts)
-            time.sleep(self.chart_wait.k8s_wait_attempt_sleep)
-
-        return True
+        return modified

    def _watch_resource_completions(self, timeout):
        '''
@ -288,8 +376,8 @@ class ResourceWait(ABC):
            if self.include_resource(resource):
                ready[resource.metadata.name] = self.handle_resource(resource)
        if not resource_list.items:
-            if self.skip_if_none_found:
-                msg = 'Skipping wait, no %s resources found.'
+            if not self.required:
+                msg = 'Skipping non-required wait, no %s resources found.'
                LOG.debug(msg, self.resource_type)
                return (False, modified, [], found_resources)
        else:
@ -370,11 +458,19 @@ class PodWait(ResourceWait):
        if is_test_pod(pod):
            return 'helm test pod'

-        # Exclude job pods
-        # TODO: Once controller-based waits are enabled by default, ignore
-        # controller-owned pods as well.
-        if has_owner(pod, 'Job'):
-            return 'generated by job (wait on that instead if not already)'
+        schema_info = get_schema_info(self.chart_wait.chart['schema'])
+        # TODO: Remove when v1 doc support is removed.
+        if schema_info.version < 2:
+            # Exclude job pods
+            if has_owner(pod, 'Job'):
+                return 'owned by job'
+        else:
+            # Exclude all pods with an owner (only include raw pods)
+            # TODO: In helm 3, all resources will likely have the release CR as
+            # an owner, so this will need to be updated to not exclude pods
+            # directly owned by the release.
+            if has_owner(pod):
+                return 'owned by another resource'

        return None

@ -409,7 +505,7 @@ class JobWait(ResourceWait):

        # Exclude cronjob jobs
        if has_owner(job, 'CronJob'):
-            return 'generated by cronjob (not part of release)'
+            return 'owned by cronjob (not part of release)'

        return None

@ -493,10 +589,13 @@ class DeploymentWait(ControllerWait):
        name = deployment.metadata.name
        spec = deployment.spec
        status = deployment.status
-
        gen = deployment.metadata.generation or 0
        observed_gen = status.observed_generation or 0
+
        if gen <= observed_gen:
+            # TODO: Don't fail for lack of progress if `min_ready` is met.
+            # TODO: Consider continuing after `min_ready` is met, so long as
+            # progress is being made.
            cond = self._get_resource_condition(status.conditions,
                                                'Progressing')
            if cond and (cond.reason or '') == 'ProgressDeadlineExceeded':
@ -531,30 +630,42 @@ class DeploymentWait(ControllerWait):

 class DaemonSetWait(ControllerWait):

-    def __init__(self, resource_type, chart_wait, labels, **kwargs):
+    def __init__(self,
+                 resource_type,
+                 chart_wait,
+                 labels,
+                 allow_async_updates=False,
+                 **kwargs):
        super(DaemonSetWait, self).__init__(
            resource_type, chart_wait, labels,
            chart_wait.k8s.apps_v1_api.list_namespaced_daemon_set, **kwargs)

+        self.allow_async_updates = allow_async_updates
+
    def is_resource_ready(self, resource):
        daemon = resource
        name = daemon.metadata.name
        spec = daemon.spec
        status = daemon.status
-
-        if spec.update_strategy.type != ROLLING_UPDATE_STRATEGY_TYPE:
-            msg = ("Assuming non-readiness for strategy type {}, can only "
-                   "determine for {}")
-            raise armada_exceptions.WaitException(
-                msg.format(spec.update_strategy.type,
-                           ROLLING_UPDATE_STRATEGY_TYPE))
-
        gen = daemon.metadata.generation or 0
        observed_gen = status.observed_generation or 0
-        updated_number_scheduled = status.updated_number_scheduled or 0
-        desired_number_scheduled = status.desired_number_scheduled or 0
-        number_available = status.number_available or 0
+
+        if not self.allow_async_updates:
+            is_update = observed_gen > 1
+            if is_update:
+                strategy = spec.update_strategy.type or ''
+                is_rolling = strategy == ROLLING_UPDATE_STRATEGY_TYPE
+                if not is_rolling:
+                    msg = "{}: update strategy type = {}"
+
+                    raise armada_exceptions.WaitException(
+                        msg.format(ASYNC_UPDATE_NOT_ALLOWED_MSG, strategy))
+
        if gen <= observed_gen:
+            updated_number_scheduled = status.updated_number_scheduled or 0
+            desired_number_scheduled = status.desired_number_scheduled or 0
+            number_available = status.number_available or 0
+
            if (updated_number_scheduled < desired_number_scheduled):
                msg = ("Waiting for daemon set {} rollout to finish: {} out "
                       "of {} new pods have been updated...")
@ -578,49 +689,58 @@ class DaemonSetWait(ControllerWait):

 class StatefulSetWait(ControllerWait):

-    def __init__(self, resource_type, chart_wait, labels, **kwargs):
+    def __init__(self,
+                 resource_type,
+                 chart_wait,
+                 labels,
+                 allow_async_updates=False,
+                 **kwargs):
        super(StatefulSetWait, self).__init__(
            resource_type, chart_wait, labels,
            chart_wait.k8s.apps_v1_api.list_namespaced_stateful_set, **kwargs)

+        self.allow_async_updates = allow_async_updates
+
    def is_resource_ready(self, resource):
        sts = resource
        name = sts.metadata.name
        spec = sts.spec
        status = sts.status
-
-        update_strategy_type = spec.update_strategy.type or ''
-        if update_strategy_type != ROLLING_UPDATE_STRATEGY_TYPE:
-            msg = ("Assuming non-readiness for strategy type {}, can only "
-                   "determine for {}")
-
-            raise armada_exceptions.WaitException(
-                msg.format(update_strategy_type, ROLLING_UPDATE_STRATEGY_TYPE))
-
        gen = sts.metadata.generation or 0
        observed_gen = status.observed_generation or 0
-        if (observed_gen == 0 or gen > observed_gen):
-            msg = "Waiting for statefulset spec update to be observed..."
-            return (msg, False)
-
        replicas = spec.replicas or 0
        ready_replicas = status.ready_replicas or 0
        updated_replicas = status.updated_replicas or 0
        current_replicas = status.current_replicas or 0

+        if not self.allow_async_updates:
+            is_update = observed_gen > 1
+            if is_update:
+                strategy = spec.update_strategy.type or ''
+                is_rolling = strategy == ROLLING_UPDATE_STRATEGY_TYPE
+                if not is_rolling:
+                    msg = "{}: update strategy type = {}"
+
+                    raise armada_exceptions.WaitException(
+                        msg.format(ASYNC_UPDATE_NOT_ALLOWED_MSG, strategy))
+
+                if (is_rolling and replicas and
+                        spec.update_strategy.rolling_update.partition):
+                    msg = "{}: partitioned rollout"
+
+                    raise armada_exceptions.WaitException(
+                        msg.format(ASYNC_UPDATE_NOT_ALLOWED_MSG))
+
+        if (observed_gen == 0 or gen > observed_gen):
+            msg = "Waiting for statefulset spec update to be observed..."
+            return (msg, False)
+
        if replicas and not self._is_min_ready(ready_replicas, replicas):
            msg = ("Waiting for statefulset {} rollout to finish: {} of {} "
                   "pods are ready, with min_ready={}")
            return (msg.format(name, ready_replicas, replicas,
                               self.min_ready.source), False)

-        if (update_strategy_type == ROLLING_UPDATE_STRATEGY_TYPE and
-                spec.update_strategy.rolling_update):
-            if replicas and spec.update_strategy.rolling_update.partition:
-                msg = ("Waiting on partitioned rollout not supported, "
-                       "assuming non-readiness of statefulset {}")
-                return (msg.format(name), False)
-
        update_revision = status.update_revision or 0
        current_revision = status.current_revision or 0

--- a/armada/schemas/armada-chart-schema-v2.yaml
+++ b/armada/schemas/armada-chart-schema-v2.yaml
@ -36,6 +36,16 @@ data:
        required:
          - type
        additionalProperties: false
+    wait_resource_type_config:
+      properties:
+        labels:
+          $ref: '#/definitions/labels'
+        min_ready:
+          anyOf:
+            - type: integer
+            - type: string
+        required:
+          type: boolean
  type: object
  properties:
    release:
@ -76,20 +86,22 @@ data:
        timeout:
          type: integer
        resources:
-          type: array
-          items:
-            properties:
-              type:
-                type: string
-              labels:
-                $ref: '#/definitions/labels'
-              min_ready:
+          anyOf:
+            - additionalProperties:
                anyOf:
-                  - type: integer
-                  - type: string
-            required:
-              - type
-            additionalProperties: false
+                  - $ref: '#/definitions/wait_resource_type_config'
+                  - type: array
+                    items:
+                      $ref: '#/definitions/wait_resource_type_config'
+            - type: array
+              items:
+                allOf:
+                  - $ref: '#/definitions/wait_resource_type_config'
+                  - properties:
+                      type:
+                        type: string
+                    required:
+                      - type
        labels:
          $ref: "#/definitions/labels"
        # Config for helm's native `--wait` param.
--- a/armada/tests/unit/handlers/test_armada.py
+++ b/armada/tests/unit/handlers/test_armada.py
@ -140,7 +140,7 @@ data:
    wait:
      timeout: 10
      native:
-        enabled: false
+        enabled: true
    test:
      enabled: true
 """
@ -195,7 +195,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
                                'wait': {
                                    'timeout': 10,
                                    'native': {
-                                        'enabled': False
+                                        'enabled': True
                                    }
                                },
                                'test': {
--- a/armada/tests/unit/handlers/test_wait.py
+++ b/armada/tests/unit/handlers/test_wait.py
@ -24,7 +24,14 @@ test_chart = {'wait': {'timeout': 10, 'native': {'enabled': False}}}

 class ChartWaitTestCase(base.ArmadaTestCase):

-    def get_unit(self, chart, timeout=None):
+    def get_unit(self, chart_data, timeout=None, version=2):
+        chart = {
+            'schema': 'armada/Chart/v{}'.format(str(version)),
+            'metadata': {
+                'name': 'test'
+            },
+            const.KEYWORD_DATA: chart_data
+        }
        return wait.ChartWait(
            k8s=mock.MagicMock(),
            release_name='test-test',
@ -44,7 +51,7 @@ class ChartWaitTestCase(base.ArmadaTestCase):

    def test_get_timeout_override(self):
        unit = self.get_unit(
-            timeout=20, chart={
+            timeout=20, chart_data={
                'timeout': 5,
                'wait': {
                    'timeout': 10
@ -57,9 +64,9 @@ class ChartWaitTestCase(base.ArmadaTestCase):
        unit = self.get_unit({'timeout': 5})
        self.assertEquals(unit.get_timeout(), 5)

-    def test_is_native_enabled_default_true(self):
+    def test_is_native_enabled_default_false(self):
        unit = self.get_unit({})
-        self.assertEquals(unit.is_native_enabled(), True)
+        self.assertEquals(unit.is_native_enabled(), False)

    def test_is_native_enabled_true(self):
        unit = self.get_unit({'wait': {'native': {'enabled': True}}})
@ -188,9 +195,11 @@ class ChartWaitTestCase(base.ArmadaTestCase):

 class PodWaitTestCase(base.ArmadaTestCase):

-    def get_unit(self, labels):
+    def get_unit(self, labels, version=2):
        return wait.PodWait(
-            resource_type='pod', chart_wait=mock.MagicMock(), labels=labels)
+            resource_type='pod',
+            chart_wait=ChartWaitTestCase.get_unit(None, {}, version=version),
+            labels=labels)

    def test_include_resource(self):

@ -223,7 +232,7 @@ class PodWaitTestCase(base.ArmadaTestCase):
            mock_resource(owner_references=[mock.Mock(kind='NotAJob')])
        ]

-        unit = self.get_unit({})
+        unit = self.get_unit({}, version=1)

        # Validate test pods excluded
        for pod in test_pods:
--- a/doc/source/operations/documents/migration-v1-v2.rst
+++ b/doc/source/operations/documents/migration-v1-v2.rst
@ -56,6 +56,68 @@ Chart
 | ``source.subpath``             |                                                            |
 | now optional                   |                                                            |
 +--------------------------------+------------------------------------------------------------+
+| ``wait`` improvements          | See `Wait Improvements`_.                                  |
+--------------------------------+------------------------------------------------------------+
+
+Wait Improvements
+^^^^^^^^^^^^^^^^^
+
+The :ref:`v2 wait API <wait_v2>` includes the following changes.
+
+Breaking changes
+****************
+
+1. ``wait.resources`` now defaults to all supported resource types,
+   currently ``job``, ``daemonset``, ``statefulset``, ``deployment``, and ``pod``, with
+   ``required`` (a new option) set to ``false``. The previous default was
+   the equivalent of pods with ``required=true``, and jobs with
+   ``required=false``.
+
+2. ``type: pod`` waits now exclude pods owned by other resources, such
+   as controllers, as one should instead wait directly on the controller itself,
+   which per 1. is now the default.
+
+3. Waits are no longer retried due to resources having been modified. This was
+   mildly useful before as an indicator of whether all targeted resources were
+   accounted for, but with 1. and 2. above, we are now tracking top-level
+   resources directly included in the release, rather than generated resources,
+   such as controller-owned pods, so there is no need to wait for them to come
+   into existence.
+
+4. ``wait.native.enabled`` is now disabled by default. With the above changes,
+   this is no longer useful as a backup mechanism. Having both enabled leads to
+   ambiguity in which wait would fail in each case. More importantly, this must
+   be disabled in order to use the ``min_ready`` functionality, otherwise tiller
+   will wait for 100% anyway. So this prevents accidentally leaving it enabled
+   in that case. Also when the tiller native wait times out, this caused the
+   release to be marked FAILED by tiller, which caused it to be purged and
+   re-installed (unless protected), even though the wait criteria may have
+   eventually succeeded, which is already validated by armada on a retry.
+
+New features
+************
+
+Per-resource-type overrides
+++++++++++++++++++++++++++
+
+``wait.resources`` can now be a dict, mapping individual resource types to
+wait configurations (or lists thereof), such that one can keep the default
+configuration for the other resource types, and also disable a given resource
+type, by mapping it to ``false``.
+
+The ability to provide the entire explicit list for ``wait.resources`` remains in
+place as well.
+
+required
++++++++
+
+A ``required`` field is also exposed for items/values in ``wait.resources``.
+
+allow_async_updates
+++++++++++++++++++
+
+An ``allow_async_updates`` field is added to daemonset and statefulset type
+items/values in ``wait.resources``.

 ChartGroup
 ----------
--- a/doc/source/operations/documents/v2/document-authoring.rst
+++ b/doc/source/operations/documents/v2/document-authoring.rst
@ -124,6 +124,8 @@ Chart
 | dependencies    | object   | (optional) reference any chart dependencies before install                            |
 +-----------------+----------+---------------------------------------------------------------------------------------+

+.. _wait_v2:
+
 Wait
 ^^^^

@ -132,8 +134,26 @@ Wait
 +=============+==========+====================================================================+
 | timeout     | int      | time (in seconds) to wait for chart to deploy                      |
 +-------------+----------+--------------------------------------------------------------------+
-| resources   | array    | Array of `Wait Resource`_ to wait on, with ``labels`` added to each|
-|             |          | item. Defaults to pods and jobs (if any exist) matching ``labels``.|
+| resources   | dict \|  | `Wait Resource`_ s to wait on. Defaults to all supported resource  |
+|             | array    | types (see `Wait Resource`_ ``.type``), with                       |
+|             |          | ``required: false``.                                               |
+|             |          |                                                                    |
+|             |          | **dict** - Maps resource types to one of:                          |
+|             |          |                                                                    |
+|             |          |   - `Wait Resource`_ without ``.type`` (single config)             |
+|             |          |                                                                    |
+|             |          |   - list of `Wait Resource`_ without ``.type`` (multiple configs)  |
+|             |          |                                                                    |
+|             |          |   - ``false`` (disabled)                                           |
+|             |          |                                                                    |
+|             |          |   Any resource type not overridden retains its default config      |
+|             |          |   mentioned above.                                                 |
+|             |          |                                                                    |
+|             |          | **array** - Lists all `Wait Resource`_ s to use, completely        |
+|             |          | overriding the default. Can be set to ``[]`` to disable all        |
+|             |          | resource types.                                                    |
+|             |          |                                                                    |
+|             |          | See also `Wait Resources Examples`_.                               |
 +-------------+----------+--------------------------------------------------------------------+
 | labels      | object   | Base mapping of labels to wait on. They are added to any labels in |
 |             |          | each item in the ``resources`` array.                              |
@ -143,18 +163,89 @@ Wait

 Wait Resource
 ^^^^^^^^^^^^^
-+-------------+----------+--------------------------------------------------------------------+
-| keyword     | type     | action                                                             |
-+=============+==========+====================================================================+
-| type        | string   | k8s resource type, supports: controllers ('deployment',            |
-|             |          | 'daemonset', 'statefulset'), 'pod', 'job'                          |
-+-------------+----------+--------------------------------------------------------------------+
-| labels      | object   | mapping of kubernetes resource labels                              |
-+-------------+----------+--------------------------------------------------------------------+
-| min\_ready  | int      | Only for controller ``type``s. Amount of pods in a controller      |
-|             | string   | which must be ready. Can be integer or percent string e.g. ``80%``.|
-|             |          | Default ``100%``.                                                  |
-+-------------+----------+--------------------------------------------------------------------+
+
+----------------------------+----------+--------------------------------------------------------------------+
+| keyword                    | type     | action                                                             |
+============================+==========+====================================================================+
+| type                       | string   | K8s resource type, supports: 'deployment', 'daemonset',            |
+|                            |          | 'statefulset', 'pod', 'job'.                                       |
+|                            |          |                                                                    |
+|                            |          | NOTE: Omit when Wait_ ``.resources`` is a dict, as then the dict   |
+|                            |          | key is used instead.                                               |
+----------------------------+----------+--------------------------------------------------------------------+
+| labels                     | object   | Kubernetes labels specific to this resource.                       |
+|                            |          | Wait_ ``.labels`` are included with these, so only define this if  |
+|                            |          | additional labels are needed to identify the targeted resources.   |
+----------------------------+----------+--------------------------------------------------------------------+
+| min\_ready                 | int \|   | Only for controller ``type`` s. Amount of pods in a controller     |
+|                            | string   | which must be ready. Can be integer or percent string e.g. ``80%``.|
+|                            |          | Default ``100%``.                                                  |
+----------------------------+----------+--------------------------------------------------------------------+
+| allow\_async\_updates      | boolean  | Only for ``daemonset`` and ``statefulset`` types. Whether to       |
+|                            |          | wait for async update strategies, i.e. OnDelete or partitioned     |
+|                            |          | RollingUpdate. Defaults to ``false`` in order to fail fast in      |
+|                            |          | cases where the async update is not expected to complete until     |
+|                            |          | same point later on.                                               |
+----------------------------+----------+--------------------------------------------------------------------+
+| required                   | boolean  | Whether to require the resource to be found.                       |
+|                            |          | Defaults to ``true`` for explicit items in  ```wait.resources``.   |
+|                            |          | See ``wait.resources`` for its overall defaults.                   |
+----------------------------+----------+--------------------------------------------------------------------+
+
+Wait Resources Examples
+^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code-block:: yaml
+
+    wait:
+      # ...
+      # Disable all waiting.
+      resources: []
+
+.. code-block:: yaml
+
+    wait:
+      # ...
+      # Disable waiting for a given type (job).
+      resources:
+        job: false
+
+.. code-block:: yaml
+
+    wait:
+      # ...
+      # Use min_ready < 100%.
+      resources:
+        daemonset:
+          min_ready: 80%
+
+.. code-block:: yaml
+
+    wait:
+      resources:
+        # Multiple configs for same type.
+        daemonset:
+          - labels:
+              component: one
+            min_ready: 80%
+          - labels:
+              component: two
+            min_ready: 50%
+
+.. code-block:: yaml
+
+    wait:
+      # ...
+      resources:
+        - type: daemonset
+          labels:
+            component: critical
+          min_ready: 100%
+        - type: daemonset
+          labels:
+            component: best_effort
+          min_ready: 80%
+        # ... (re-include any other resource types needed when using list)

 Wait Native
 ^^^^^^^^^^^
@ -164,7 +255,7 @@ Config for the native ``helm (install|upgrade) --wait`` flag.
 +-------------+----------+--------------------------------------------------------------------+
 | keyword     | type     | action                                                             |
 +=============+==========+====================================================================+
-| enabled     | boolean  | defaults to true                                                   |
+| enabled     | boolean  | defaults to ``false``                                              |
 +-------------+----------+--------------------------------------------------------------------+

 .. _test_v2:
@ -187,7 +278,7 @@ Run helm tests on the chart after install/upgrade.
 .. note::

    Armada will attempt to run helm tests by default. They may be disabled by
-    setting the ``enabled`` key to ``False``.
+    setting the ``enabled`` key to ``false``.

 Test Options
 ~~~~~~~~~~~~