Merge "Make K8S proxy health check more aggressive"

This commit is contained in:
Zuul 2018-09-11 20:16:59 +00:00 committed by Gerrit Code Review
commit 018496fd18
5 changed files with 84 additions and 14 deletions

View File

@ -0,0 +1,26 @@
#!/bin/bash
set -e
FAILURE=0
{{- if .Values.livenessProbe.whitelist }}
WHITELIST='({{- join "|" .Values.livenessProbe.whitelist -}})'
{{- end }}
REQUEST='GET /healthz HTTP/1.0\r\nHost: localhost:10256\r\n'
if [[ $(echo -e "${REQUEST}" | socat - TCP4:localhost:10256 | grep -sc '200 OK') -lt 1 ]]; then
echo Failed proxy built-in HTTP health check.
echo -e "${REQUEST}" | socat - TCP4:localhost:10256
FAILURE=1
fi
if [[ $(iptables-save {{- if .Values.livenessProbe.whitelist }} | grep -Ev "${WHITELIST}" {{- end }} | grep -sc 'has no endpoints') -gt 0 ]]; then
echo Some non-whitelisted services have no endpoints:
iptables-save | grep 'has no endpoints'
FAILURE=1
fi
if [[ "${FAILURE}" == "1" ]]; then
exit 1
fi

View File

@ -0,0 +1,5 @@
#!/bin/bash
set -e
iptables-save | grep 'default/kubernetes:https'

View File

@ -0,0 +1,26 @@
{{/*
# Copyright (c) 2018 AT&T Intellectual Property. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License. */}}
---
apiVersion: v1
kind: ConfigMap
metadata:
name: kubernetes-proxy-bin
data:
liveness-probe.sh: |
{{ tuple "bin/_liveness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
readiness-probe.sh: |
{{ tuple "bin/_readiness-probe.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
...

View File

@ -63,24 +63,23 @@ spec:
- name: KUBERNETES_SERVICE_PORT
value: {{ .Values.kube_service.port | quote }}
livenessProbe:
httpGet:
host: 127.0.0.1
path: /healthz
port: 10256
failureThreshold: 3
initialDelaySeconds: 15
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
{{ toYaml .Values.livenessProbe.config | indent 10 }}
exec:
command:
- /tmp/bin/liveness-probe.sh
readinessProbe:
exec:
command:
- sh
- -c
- |-
set -ex
iptables-save | grep 'default/kubernetes:https'
- /tmp/bin/readiness-probe.sh
initialDelaySeconds: 15
periodSeconds: 15
volumeMounts:
- name: bin
mountPath: /tmp/bin/
serviceAccountName: kube-proxy
volumes:
- name: bin
configMap:
name: kubernetes-proxy-bin
defaultMode: 0555
{{- end }}

View File

@ -55,3 +55,17 @@ network:
kube_service:
host: 127.0.0.1
port: 6553
livenessProbe:
config:
# NOTE(mark-burnett): To avoid cascading failure modes, it is
# important that these values are configured to avoid the possibility
# of CrashLoopBackoff for this pod. Otherwise, a small non-impacting
# issue could disable kube-proxy for the entire site.
failureThreshold: 10
initialDelaySeconds: 15
periodSeconds: 35
successThreshold: 1
timeoutSeconds: 10
whitelist:
# - postgres