From 9573afd3c2117de18225b020fba259e3e82af00c Mon Sep 17 00:00:00 2001 From: Sean Eagan Date: Tue, 4 Jun 2019 15:27:09 -0500 Subject: [PATCH] Prevent tiller from leaving releases in pending status In general, stuck pending statuses can be avoided by not enabling the tiller native wait flag when updating releases, since tiller then marks the release completed directly after applying the resources to kubernetes. However, when updating tiller itself, once kubernetes sees the updated tiller resource, it can bring tiller down before it has a chance to mark the release which contains tiller as completed, leaving it in pending status. This adds a preStop hook to both the standalone and sidecar tiller containers to simply sleep to give them a chance to finish updating their release, before terminating. Ideally tiller would handle this on its own via signal handling, but it doesn't. We could try to query for the absence of PENDING_*** releases via `helm ls` before exiting, however the helm CLI is not available inside the tiller image, and those releases could be getting updated from another tiller instance, or had already got stuck in that state previously, in which case we don't want to hold up tiller termination. Change-Id: I300c613f2a89eb1406531ce0a9af85c429a886f2 --- charts/armada/templates/deployment-api.yaml | 10 ++++++++++ charts/armada/values.yaml | 3 +++ charts/tiller/templates/deployment-tiller.yaml | 10 ++++++++++ charts/tiller/values.yaml | 3 +++ 4 files changed, 26 insertions(+) diff --git a/charts/armada/templates/deployment-api.yaml b/charts/armada/templates/deployment-api.yaml index 23761edd..61ca7a16 100644 --- a/charts/armada/templates/deployment-api.yaml +++ b/charts/armada/templates/deployment-api.yaml @@ -151,6 +151,16 @@ spec: {{- if .Values.conf.tiller.trace }} - -trace {{- end }} + lifecycle: + preStop: + exec: + command: + # Delay tiller termination so that it has a chance to finish + # deploying releases including marking them with + # DEPLOYED/FAILED status, otherwise they can get stuck in + # PENDING_*** status. + - sleep + - "{{ .Values.conf.tiller.prestop_sleep }}" ports: - name: tiller containerPort: {{ .Values.conf.tiller.port }} diff --git a/charts/armada/values.yaml b/charts/armada/values.yaml index 979f3d36..d5d955a6 100644 --- a/charts/armada/values.yaml +++ b/charts/armada/values.yaml @@ -209,6 +209,9 @@ conf: namespace: kube-system # Limit the maximum number of revisions saved per release. 0 for no limit. history_max: 0 + # Note: Defaulting to the (default) kubernetes grace period, as anything + # greater than that will have no effect. + prestop_sleep: 30 pod: env: diff --git a/charts/tiller/templates/deployment-tiller.yaml b/charts/tiller/templates/deployment-tiller.yaml index a7711891..2607f9a0 100644 --- a/charts/tiller/templates/deployment-tiller.yaml +++ b/charts/tiller/templates/deployment-tiller.yaml @@ -77,6 +77,16 @@ spec: {{- if .Values.conf.tiller.trace }} - -trace {{- end }} + lifecycle: + preStop: + exec: + command: + # Delay tiller termination so that it has a chance to finish + # deploying releases including marking them with + # DEPLOYED/FAILED status, otherwise they can get stuck in + # PENDING_*** status. + - sleep + - "{{ .Values.conf.tiller.prestop_sleep }}" ports: - name: tiller containerPort: 44134 diff --git a/charts/tiller/values.yaml b/charts/tiller/values.yaml index e9ab128d..59170ae8 100644 --- a/charts/tiller/values.yaml +++ b/charts/tiller/values.yaml @@ -41,6 +41,9 @@ conf: tiller: verbosity: 5 trace: false + # Note: Defaulting to the (default) kubernetes grace period, as anything + # greater than that will have no effect. + prestop_sleep: 30 pod: resources: