diff --git a/charts/proxy/templates/bin/_liveness-probe.sh.tpl b/charts/proxy/templates/bin/_liveness-probe.sh.tpl new file mode 100644 index 00000000..c00266aa --- /dev/null +++ b/charts/proxy/templates/bin/_liveness-probe.sh.tpl @@ -0,0 +1,26 @@ +#!/bin/bash + +set -e + +FAILURE=0 +{{- if .Values.livenessProbe.whitelist }} +WHITELIST='({{- join "|" .Values.livenessProbe.whitelist -}})' +{{- end }} + +REQUEST='GET /healthz HTTP/1.0\r\nHost: localhost:10256\r\n' + +if [[ $(echo -e "${REQUEST}" | socat - TCP4:localhost:10256 | grep -sc '200 OK') -lt 1 ]]; then + echo Failed proxy built-in HTTP health check. + echo -e "${REQUEST}" | socat - TCP4:localhost:10256 + FAILURE=1 +fi + +if [[ $(iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -sc 'has no endpoints') -gt 0 ]]; then + echo Some non-whitelisted services have no endpoints: + iptables-save | grep 'has no endpoints' + FAILURE=1 +fi + +if [[ "${FAILURE}" == "1" ]]; then + exit 1 +fi diff --git a/charts/proxy/templates/bin/_readiness-probe.sh.tpl b/charts/proxy/templates/bin/_readiness-probe.sh.tpl new file mode 100644 index 00000000..3f87b48f --- /dev/null +++ b/charts/proxy/templates/bin/_readiness-probe.sh.tpl @@ -0,0 +1,5 @@ +#!/bin/bash + +set -e + +iptables-save | grep 'default/kubernetes:https' diff --git a/charts/proxy/templates/configmap-bin.yaml b/charts/proxy/templates/configmap-bin.yaml new file mode 100644 index 00000000..8b98721c --- /dev/null +++ b/charts/proxy/templates/configmap-bin.yaml @@ -0,0 +1,26 @@ +{{/* +# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. */}} + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubernetes-proxy-bin +data: + liveness-probe.sh: | +{{ tuple "bin/_liveness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + readiness-probe.sh: | +{{ tuple "bin/_readiness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} +... diff --git a/charts/proxy/templates/daemonset.yaml b/charts/proxy/templates/daemonset.yaml index 1af7df6b..ac77e107 100644 --- a/charts/proxy/templates/daemonset.yaml +++ b/charts/proxy/templates/daemonset.yaml @@ -63,24 +63,23 @@ spec: - name: KUBERNETES_SERVICE_PORT value: {{ .Values.kube_service.port | quote }} livenessProbe: - httpGet: - host: 127.0.0.1 - path: /healthz - port: 10256 - failureThreshold: 3 - initialDelaySeconds: 15 - periodSeconds: 10 - successThreshold: 1 - timeoutSeconds: 5 +{{ toYaml .Values.livenessProbe.config | indent 10 }} + exec: + command: + - /tmp/bin/liveness-probe.sh readinessProbe: exec: command: - - sh - - -c - - |- - set -ex - iptables-save | grep 'default/kubernetes:https' + - /tmp/bin/readiness-probe.sh initialDelaySeconds: 15 periodSeconds: 15 + volumeMounts: + - name: bin + mountPath: /tmp/bin/ serviceAccountName: kube-proxy + volumes: + - name: bin + configMap: + name: kubernetes-proxy-bin + defaultMode: 0555 {{- end }} diff --git a/charts/proxy/values.yaml b/charts/proxy/values.yaml index 233467f8..18d1f2df 100644 --- a/charts/proxy/values.yaml +++ b/charts/proxy/values.yaml @@ -55,3 +55,17 @@ network: kube_service: host: 127.0.0.1 port: 6553 + +livenessProbe: + config: + # NOTE(mark-burnett): To avoid cascading failure modes, it is + # important that these values are configured to avoid the possibility + # of CrashLoopBackoff for this pod. Otherwise, a small non-impacting + # issue could disable kube-proxy for the entire site. + failureThreshold: 10 + initialDelaySeconds: 15 + periodSeconds: 35 + successThreshold: 1 + timeoutSeconds: 10 + whitelist: + # - postgres