From 12f448963f4285cd78fcf857563fadf4a7fb4c13 Mon Sep 17 00:00:00 2001 From: Ruslan Aliev Date: Thu, 18 Apr 2024 19:05:18 -0500 Subject: [PATCH] ETCD improvements * remove healthcheck sidecar, perform probes in etcd container itself, failing liveness probes in sidecar do not restart problematic etcd container; * verify that etcdctl member list cmd in anchor is always successfull; * adjust ETCDCTL_ENDPOINTS env in etcd container to POD_IP variable instead of localhost (127.0.0.1); * add liveness/readiness probes to auxiliary etcd as well as properly passing etcd configuration variables as strings; * monitor current leader in initial etcd cluster, in case if aux member is current leader pass it to permenant member, same check applies for aux suicide process; * etcd aux pod will be alive unless all permanent nodes come up and join the cluster plus apiserver no longer relies on aux members; * add 5 seconds sleep between aux member remove for more smooth transition process. Signed-off-by: Ruslan Aliev Change-Id: I7918072a6ba5a6b22b359d1616def8c31425462d --- charts/etcd/templates/bin/_etcdctl_anchor.tpl | 6 +- .../templates/etc/_kubernetes-etcd.yaml.tpl | 55 +++++++------------ charts/etcd/values.yaml | 7 --- .../genesis-etcd/server-container.yaml | 34 ++++++++++-- .../manifests/auxiliary-kubernetes-etcd.yaml | 48 +++++++++++++++- 5 files changed, 98 insertions(+), 52 deletions(-) diff --git a/charts/etcd/templates/bin/_etcdctl_anchor.tpl b/charts/etcd/templates/bin/_etcdctl_anchor.tpl index 0562b770..93c1c1e9 100644 --- a/charts/etcd/templates/bin/_etcdctl_anchor.tpl +++ b/charts/etcd/templates/bin/_etcdctl_anchor.tpl @@ -61,6 +61,7 @@ cleanup_host () { firstrun=true saddness_duration=0 while true; do + date # TODO(mark-burnett) Need to monitor a file(s) when shutting down/starting # up so I don't try to take two actions on the node at once. {{- if .Values.bootstrapping.enabled }} @@ -117,7 +118,10 @@ while true; do fi echo Successfully added $HOSTNAME to cluster members. # Refresh member list so we start with the right configuration. - etcdctl member list > /tmp/members + if ! etcdctl member list > /tmp/members; then + echo Could not get a member list, trying again. + continue + fi elif grep $PEER_ENDPOINT /tmp/members | grep '\bunstarted\b'; then # This member is in the cluster but not started if [ $saddness_duration -ge {{ .Values.anchor.saddness_threshold }} ] diff --git a/charts/etcd/templates/etc/_kubernetes-etcd.yaml.tpl b/charts/etcd/templates/etc/_kubernetes-etcd.yaml.tpl index 5df4eb10..b75ddecb 100644 --- a/charts/etcd/templates/etc/_kubernetes-etcd.yaml.tpl +++ b/charts/etcd/templates/etc/_kubernetes-etcd.yaml.tpl @@ -18,20 +18,26 @@ {{- define "etcdreadinessProbeTemplate" }} exec: command: - - /bin/sh - - -c - - |- - etcdctl endpoint health - exit $? + - etcdctl + - endpoint + - health +initialDelaySeconds: 10 +timeoutSeconds: 5 +periodSeconds: 10 +successThreshold: 1 +failureThreshold: 3 {{- end }} {{- define "etcdlivenessProbeTemplate" }} exec: command: - - /bin/sh - - -c - - |- - etcdctl endpoint status - exit $? + - etcdctl + - endpoint + - health +initialDelaySeconds: 15 +timeoutSeconds: 5 +periodSeconds: 10 +successThreshold: 1 +failureThreshold: 3 {{- end }} # Strip off "etcd" from service name to get the application name # Note that application can either be kubernetes or calico for now @@ -109,7 +115,7 @@ spec: - name: ETCDCTL_DIAL_TIMEOUT value: 3s - name: ETCDCTL_ENDPOINTS - value: https://127.0.0.1:{{ .Values.network.service_client.target_port }} + value: https://$(POD_IP):{{ .Values.network.service_client.target_port }} - name: ETCDCTL_CACERT value: $(ETCD_TRUSTED_CA_FILE) - name: ETCDCTL_CERT @@ -123,34 +129,11 @@ spec: - name: MANIFEST_PATH value: /manifests/{{ .Values.service.name }}.yaml {{ include "helm-toolkit.utils.to_k8s_env_vars" .Values.pod.env.etcd | indent 8 }} - volumeMounts: - - name: data - mountPath: /var/lib/etcd - - name: etc - mountPath: /etc/etcd - - name: etcd-health-check - image: {{ .Values.images.tags.etcdctl }} - imagePullPolicy: {{ .Values.images.pull_policy }} -{{ tuple $envAll $envAll.Values.pod.resources.etcd_pod_sidecar | include "helm-toolkit.snippets.kubernetes_resources" | indent 6 }} -{{ dict "envAll" $envAll "application" "etcd" "container" "etcd" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 6 }} - env: - - name: ETCDCTL_API - value: "{{ .Values.etcd.etcdctl_api }}" - - name: ETCDCTL_DIAL_TIMEOUT - value: "3s" - - name: ETCDCTL_ENDPOINTS - value: "https://127.0.0.1:{{ .Values.network.service_client.target_port }}" - - name: ETCDCTL_CACERT - value: "/etc/etcd/tls/client-ca.pem" - - name: ETCDCTL_CERT - value: "/etc/etcd/tls/etcd-client.pem" - - name: ETCDCTL_KEY - value: "/etc/etcd/tls/etcd-client-key.pem" - command: ["/bin/sh", "-c", "--"] - args: ["while true; do sleep 30; done;"] {{ dict "envAll" $envAll "component" "etcd" "container" "etcd" "type" "readiness" "probeTemplate" (include "etcdreadinessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 6 }} {{ dict "envAll" $envAll "component" "etcd" "container" "etcd" "type" "liveness" "probeTemplate" (include "etcdlivenessProbeTemplate" $envAll | fromYaml) | include "helm-toolkit.snippets.kubernetes_probe" | indent 6 }} volumeMounts: + - name: data + mountPath: /var/lib/etcd - name: etc mountPath: /etc/etcd volumes: diff --git a/charts/etcd/values.yaml b/charts/etcd/values.yaml index 739db2c8..389b1b93 100644 --- a/charts/etcd/values.yaml +++ b/charts/etcd/values.yaml @@ -261,13 +261,6 @@ pod: limits: memory: "1024Mi" cpu: "2000m" - etcd_pod_sidecar: - requests: - memory: "128Mi" - cpu: "100m" - limits: - memory: "1024Mi" - cpu: "2000m" test: limits: memory: "128Mi" diff --git a/promenade/templates/include/genesis-etcd/server-container.yaml b/promenade/templates/include/genesis-etcd/server-container.yaml index a8d64078..1ae8b920 100644 --- a/promenade/templates/include/genesis-etcd/server-container.yaml +++ b/promenade/templates/include/genesis-etcd/server-container.yaml @@ -46,7 +46,7 @@ - name: ETCDCTL_DIAL_TIMEOUT value: 3s - name: ETCDCTL_ENDPOINTS - value: https://$(POD_IP):{{ client_port }},https://127.0.0.1:{{ client_port }} + value: https://$(POD_IP):{{ client_port }} - name: ETCDCTL_CACERT value: $(ETCD_TRUSTED_CA_FILE) - name: ETCDCTL_CERT @@ -55,29 +55,51 @@ value: $(ETCD_KEY_FILE) {%- if config['Genesis:etcd.heartbeat_interval'] is defined %} - name: ETCD_HEARTBEAT_INTERVAL - value: {{ config['Genesis:etcd.heartbeat_interval'] }} + value: "{{ config['Genesis:etcd.heartbeat_interval'] }}" {%- endif %} {%- if config['Genesis:etcd.election_timeout'] is defined %} - name: ETCD_ELECTION_TIMEOUT - value: {{ config['Genesis:etcd.election_timeout'] }} + value: "{{ config['Genesis:etcd.election_timeout'] }}" {%- endif %} {%- if config['Genesis:etcd.snapshot_count'] is defined %} - name: ETCD_SNAPSHOT_COUNT - value: {{ config['Genesis:etcd.snapshot_count'] }} + value: "{{ config['Genesis:etcd.snapshot_count'] }}" {%- endif %} {%- if config['Genesis:etcd.cipher_suites'] is defined %} - name: ETCD_CIPHER_SUITES - value: {{ config['Genesis:etcd.cipher_suites'] }} + value: "{{ config['Genesis:etcd.cipher_suites'] }}" {%- endif %} {%- if config['Genesis:etcd.gomaxprocs'] is defined %} - name: GOMAXPROCS - value: {{ config['Genesis:etcd.gomaxprocs'] }} + value: "{{ config['Genesis:etcd.gomaxprocs'] }}" {%- endif %} ports: - name: client containerPort: {{ client_port }} - name: peer containerPort: {{ peer_port }} + livenessProbe: + exec: + command: + - etcdctl + - endpoint + - health + initialDelaySeconds: 15 + timeoutSeconds: 5 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 + readinessProbe: + exec: + command: + - etcdctl + - endpoint + - health + initialDelaySeconds: 10 + timeoutSeconds: 5 + periodSeconds: 10 + successThreshold: 1 + failureThreshold: 3 volumeMounts: - name: data-{{ etcd_name }} mountPath: /var/lib/etcd diff --git a/promenade/templates/roles/genesis/etc/kubernetes/manifests/auxiliary-kubernetes-etcd.yaml b/promenade/templates/roles/genesis/etc/kubernetes/manifests/auxiliary-kubernetes-etcd.yaml index 6ee6e770..e090dda7 100644 --- a/promenade/templates/roles/genesis/etc/kubernetes/manifests/auxiliary-kubernetes-etcd.yaml +++ b/promenade/templates/roles/genesis/etc/kubernetes/manifests/auxiliary-kubernetes-etcd.yaml @@ -35,17 +35,61 @@ spec: } remove_if_possible () { - MEMBER_NAME=$1 + MEMBER_NAME="$1" MEMBER_ID=$(etcdctl member list | grep "${MEMBER_NAME}" | awk -F ', ' '{ print $1 }') if [ -n "${MEMBER_ID}" ]; then + if is_leader "$MEMBER_ID"; then + abdicate "$MEMBER_ID" + fi etcdctl member remove $MEMBER_ID + sleep 5 fi } + abdicate () { + OLD_LEADER="$1" + OLD_LEADER_EP=$(etcdctl member list | grep "$OLD_LEADER" | awk -F ', ' '{print $5}') + NEW_LEADER=$(etcdctl member list | grep '\bstarted\b' | grep -Ev "\\b(auxiliary-0|auxiliary-1)\\b" | head -1 | awk -F ', ' '{print $1}') + if [ -n "$NEW_LEADER" ]; then + if ! ETCDCTL_ENDPOINTS="$OLD_LEADER_EP" etcdctl move-leader "$NEW_LEADER"; then + echo "Attempted abdication, but failed." + return + fi + sleep 5 + return + fi + } + + abdicate_if_needed () { + AUX_MEMBERS=$(etcdctl member list | grep '\bstarted\b' | grep -E "\\b(auxiliary-0|auxiliary-1)\\b" | awk -F ', ' '{print $1}') + for m in $AUX_MEMBERS; do + if is_leader "$m"; then + abdicate "$m" + fi + done + } + + is_leader () { + MEMBER_ID="$1" + MEMBER_EP=$(etcdctl member list | grep "$MEMBER_ID" | awk -F ', ' '{print $5}') + IS_LEADER=$(ETCDCTL_ENDPOINTS="$MEMBER_EP" etcdctl endpoint status | awk -F ', ' '{ print $5 }') + if [ "$IS_LEADER" = "true" ]; then + return 0 + else + return 1 + fi + } + + aux_endpoint_present () { + awk '/- name: ETCD_ENDPOINTS/{getline; sub(/.*: "/, ""); sub(/".*/, ""); print}' /manifests/kubernetes-apiserver.yaml \ + | grep -E "\\b(12379|22379)\\b" + } + auxiliary_threshold="{{ config.get_first('Genesis:etcd.auxiliary_threshold', default=3) }}" # NOTE(sh8121att): If there are enough (a fully resilient contigent) non-auxiliary members, # then we are ready to remove the auxiliary members. Otherwise, wait. - while [ ! "$(external_member_count)" -ge "$auxiliary_threshold" ]; do + while [ ! "$(external_member_count)" -ge "$auxiliary_threshold" ] || [ "$(aux_endpoint_present)" ]; do + abdicate_if_needed sleep 30 done