Implements: etcd backup

This is an effort to impletment etcd backup.
This will create a k8s cron job to take a regular backup.

Change-Id: If2c89ac01540c0f13f9b57a6833a8ea770379717
Signed-off-by: Sreejith Punnapuzha <Sreejith.Punnapuzha@outlook.com>
This commit is contained in:
Sreejith Punnapuzha 2018-09-06 12:29:16 -05:00
parent 07e2908777
commit b65752d7c5
4 changed files with 214 additions and 0 deletions

View File

@ -0,0 +1,61 @@
#!/bin/sh
{{/*
Copyright 2017 AT&T Intellectual Property. All other rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
set -ex
BACKUP_DIR="/var/lib/etcd/backup"
BACKUP_LOG={{ .Values.backup.backup_log_file | quote }}
NUM_TO_KEEP={{ .Values.backup.no_backup_keep | quote }}
SKIP_BACKUP=0
etcdbackup() {
etcdctl snapshot save $BACKUP_DIR/etcd-backup-$(date +"%m-%d-%Y-%H-%M-%S").db >> $BACKUP_LOG
BACKUP_RETURN_CODE=$?
if [[ $BACKUP_RETURN_CODE != 0 ]]; then
echo "There was an error backing up the databases. Return code was $BACKUP_RETURN_CODE."
exit $BACKUP_RETURN_CODE
fi
LATEST_BACKUP=`ls -t $BACKUP_DIR | head -1`
echo "Archiving $LATEST_BACKUP..."
cd $BACKUP_DIR
tar -czf $BACKUP_DIR/$LATEST_BACKUP.tar.gz $LATEST_BACKUP
rm -rf $LATEST_BACKUP
echo "Clearing earliest backups..."
NUM_LOCAL_BACKUPS=`ls -ld $BACKUP_DIR | wc -l`
while [ $NUM_LOCAL_BACKUPS -gt $NUM_TO_KEEP ]
do
EARLIEST_BACKUP=`ls -tr $BACKUP_DIR | head -1`
echo "Deleting $EARLIEST_BACKUP..."
rm -rf "$BACKUP_DIR/$EARLIEST_BACKUP"
NUM_LOCAL_BACKUPS=`ls -ld $BACKUP_DIR | wc -l`
done
}
if ! [ -x "$(which etcdctl)" ]; then
echo "ERROR: etcdctl not available, Please use the correct image."
SKIP_BACKUP=1
fi
if [ ! -d "$BACKUP_DIR" ]; then
echo "ERROR: $BACKUP_DIR doesn't exist, Backup will not continue"
SKIP_BACKUP=1
fi
if [ $SKIP_BACKUP == '0' ]; then
etcdbackup
else
echo "Error: etcd backup failed."
exit 1
fi

View File

@ -29,4 +29,6 @@ data:
{{ tuple "bin/_pre_stop.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
readiness: |+
{{ tuple "bin/_readiness.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
etcdbackup: |+
{{ tuple "bin/_etcdbackup.tpl" . | include "helm-toolkit.utils.template" | indent 4 }}
{{- end }}

View File

@ -0,0 +1,124 @@
{{/*
Copyright 2017 AT&T Intellectual Property. All other rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/}}
{{- if .Values.manifests.cron_etcd_backup }}
{{- $envAll := . }}
{{- $serviceAccountName := "etcd-backup" }}
{{- $applicationName := "etcd-backup" }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: Role
metadata:
name: {{ $serviceAccountName }}
rules:
- apiGroups:
- ""
resources:
- secrets
verbs:
- get
- list
---
apiVersion: v1
kind: ServiceAccount
metadata:
labels:
component: etcd-backup
name: {{ $serviceAccountName }}
namespace: {{ $envAll.Release.Namespace }}
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: RoleBinding
metadata:
name: {{ $serviceAccountName }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: {{ $serviceAccountName }}
subjects:
- kind: ServiceAccount
name: {{ $serviceAccountName }}
namespace: {{ $envAll.Release.Namespace }}
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: etcd-backup
spec:
schedule: {{ .Values.jobs.etcd_backup.cron | quote }}
successfulJobsHistoryLimit: {{ .Values.jobs.etcd_backup.history.success }}
failedJobsHistoryLimit: {{ .Values.jobs.etcd_backup.history.failed }}
concurrencyPolicy: Forbid
jobTemplate:
metadata:
labels:
{{ tuple $envAll $applicationName "etcd-anchor" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
spec:
template:
spec:
serviceAccountName: {{ $serviceAccountName }}
restartPolicy: OnFailure
nodeSelector:
{{ .Values.labels.anchor.node_selector_key }}: {{ .Values.labels.anchor.node_selector_value }}
containers:
- name: etcd-backup
image: {{ .Values.images.tags.etcdctl }}
imagePullPolicy: {{ .Values.images.pull_policy }}
{{ tuple $envAll $envAll.Values.pod.resources.jobs.etcd_backup | include "helm-toolkit.snippets.kubernetes_resources" | indent 14 }}
env:
- name: ETCDCTL_API
value: '3'
- name: ETCDCTL_DIAL_TIMEOUT
value: {{ .Values.backup.etcdctl_dial_timeout }}
- name: ETCDCTL_ENDPOINTS
value: https://{{ .Values.anchor.etcdctl_endpoint }}:{{ .Values.network.service_client.port }}
- name: ETCDCTL_CACERT
value: /etc/etcd/tls/certs/client-ca.pem
- name: ETCDCTL_CERT
value: /etc/etcd/tls/certs/anchor-etcd-client.pem
- name: ETCDCTL_KEY
value: /etc/etcd/tls/keys/anchor-etcd-client-key.pem
- name: CLIENT_ENDPOINT
value: https://$(POD_IP):{{ .Values.network.service_client.target_port }}
- name: PEER_ENDPOINT
value: https://$(POD_IP):{{ .Values.network.service_peer.target_port }}
command:
- /tmp/bin/etcdbackup
volumeMounts:
- name: {{ .Values.service.name }}-bin
mountPath: /tmp/bin
- name: {{ .Values.service.name }}-certs
mountPath: /etc/etcd/tls/certs
- name: {{ .Values.service.name }}-keys
mountPath: /etc/etcd/tls/keys
- name: etcd-backup
mountPath: /var/lib/etcd/backup
volumes:
- name: {{ .Values.service.name }}-bin
configMap:
name: {{ .Values.service.name }}-bin
defaultMode: 0555
- name: {{ .Values.service.name }}-certs
configMap:
name: {{ .Values.service.name }}-certs
defaultMode: 0444
- name: etcd-backup
hostPath:
path: {{ .Values.backup.host_backup_path }}
- name: {{ .Values.service.name }}-keys
secret:
secretName: {{ .Values.service.name }}-keys
defaultMode: 0444
{{- end }}

View File

@ -39,6 +39,12 @@ etcd:
host_data_path: /var/lib/etcd/example
cleanup_data: true
backup:
host_backup_path: /var/lib/etcd/backup
backup_log_file: /var/log/etcd-backup.log
no_backup_keep: 10
etcdctl_dial_timeout: 15s
network:
service_client:
name: service_client
@ -83,6 +89,11 @@ nodes:
cert: placeholder
key: placeholder
dependencies:
static:
etcd_backup:
jobs:
- etcd_backup_job
pod:
mounts:
daemonset_anchor:
@ -123,6 +134,21 @@ pod:
requests:
memory: "128Mi"
cpu: "100m"
jobs:
etcdbackup:
limits:
memory: "128Mi"
cpu: "100m"
requests:
memory: "128Mi"
cpu: "100m"
jobs:
etcd_backup:
cron: "0 */12 * * *"
history:
success: 3
failed: 1
manifests:
configmap_bin: true
@ -132,3 +158,4 @@ manifests:
secret: true
service: true
test_etcd_health: true
cron_etcd_backup: true