From 46b6437e7269c20e4216e61759c26fa2cbe0a8ac Mon Sep 17 00:00:00 2001 From: Matt McEuen Date: Tue, 11 Jun 2019 13:40:23 -0500 Subject: [PATCH] Make static manifest cleanup configurable By design, the anchor pods clean up after their static pods (and associated secrets/configs) via a hook when they the anchor pods are stopped, to make sure that cruft is not left lying around (or running) when an anchor pod is no longer scheduled to a host. However, it's been observed that on a host under high load, e.g. if one or two other control plane hosts are down, then the anchor pods may be stopped in an unplanned manner. This results in service unavailability for the anchored static manifest pods. This change makes that cleanup behavior configurable (following the pattern already implemented in the haproxy chart) but leaves it on by by default. Change-Id: Iab14510ef8ea5b9e400e0f744231811117029887 --- charts/apiserver/templates/bin/_anchor.tpl | 2 ++ charts/apiserver/values.yaml | 1 + charts/controller_manager/templates/bin/_anchor.tpl | 2 ++ charts/controller_manager/values.yaml | 1 + charts/etcd/templates/bin/_pre_stop.tpl | 3 +++ charts/etcd/values.yaml | 1 + charts/scheduler/templates/bin/_anchor.tpl | 2 ++ charts/scheduler/values.yaml | 1 + 8 files changed, 13 insertions(+) diff --git a/charts/apiserver/templates/bin/_anchor.tpl b/charts/apiserver/templates/bin/_anchor.tpl index 904a4670..ef5d4f6c 100644 --- a/charts/apiserver/templates/bin/_anchor.tpl +++ b/charts/apiserver/templates/bin/_anchor.tpl @@ -65,7 +65,9 @@ snapshot_files "${SNAPSHOT_DIR}" while true; do if [ -e /tmp/stop ]; then echo Stopping + {{- if .Values.anchor.enable_cleanup }} cleanup + {{- end }} break fi diff --git a/charts/apiserver/values.yaml b/charts/apiserver/values.yaml index 4278139e..26d82d0b 100644 --- a/charts/apiserver/values.yaml +++ b/charts/apiserver/values.yaml @@ -77,6 +77,7 @@ labels: anchor: dns_policy: Default + enable_cleanup: true kubelet: manifest_path: /etc/kubernetes/manifests period: 15 diff --git a/charts/controller_manager/templates/bin/_anchor.tpl b/charts/controller_manager/templates/bin/_anchor.tpl index c311ffa0..e1d1b612 100644 --- a/charts/controller_manager/templates/bin/_anchor.tpl +++ b/charts/controller_manager/templates/bin/_anchor.tpl @@ -37,7 +37,9 @@ while true; do if [ -e /tmp/stop ]; then echo Stopping + {{- if .Values.anchor.enable_cleanup }} cleanup + {{- end }} break fi diff --git a/charts/controller_manager/values.yaml b/charts/controller_manager/values.yaml index e9050398..29b95f07 100644 --- a/charts/controller_manager/values.yaml +++ b/charts/controller_manager/values.yaml @@ -27,6 +27,7 @@ labels: anchor: dns_policy: Default + enable_cleanup: true kubelet: manifest_path: /etc/kubernetes/manifests period: 15 diff --git a/charts/etcd/templates/bin/_pre_stop.tpl b/charts/etcd/templates/bin/_pre_stop.tpl index ab73b8cd..5346a63f 100644 --- a/charts/etcd/templates/bin/_pre_stop.tpl +++ b/charts/etcd/templates/bin/_pre_stop.tpl @@ -26,6 +26,7 @@ function cleanup_host { # Let the anchor process know it should not try to start the server. touch /tmp/stopping +{{- if .Values.anchor.enable_cleanup }} while true; do if etcdctl member list > /tmp/stop_members; then if grep $PEER_ENDPOINT /tmp/stop_members; then @@ -43,3 +44,5 @@ while true; do sleep {{ .Values.anchor.period }} done +{{- end }} +touch /tmp/stopped diff --git a/charts/etcd/values.yaml b/charts/etcd/values.yaml index e6654421..cd1f98f8 100644 --- a/charts/etcd/values.yaml +++ b/charts/etcd/values.yaml @@ -25,6 +25,7 @@ labels: anchor: dns_policy: ClusterFirstWithHostNet + enable_cleanup: true etcdctl_endpoint: example-etcd host_data_path: /var/lib/etcd/example diff --git a/charts/scheduler/templates/bin/_anchor.tpl b/charts/scheduler/templates/bin/_anchor.tpl index 1ae2244c..86e12ea5 100644 --- a/charts/scheduler/templates/bin/_anchor.tpl +++ b/charts/scheduler/templates/bin/_anchor.tpl @@ -36,7 +36,9 @@ cleanup() { while true; do if [ -e /tmp/stop ]; then echo Stopping + {{- if .Values.anchor.enable_cleanup }} cleanup + {{- end }} break fi diff --git a/charts/scheduler/values.yaml b/charts/scheduler/values.yaml index 0cddba7d..3db78be5 100644 --- a/charts/scheduler/values.yaml +++ b/charts/scheduler/values.yaml @@ -2,6 +2,7 @@ release_group: null anchor: dns_policy: Default + enable_cleanup: true kubelet: manifest_path: /etc/kubernetes/manifests period: 15