From 15658a088e1ffe17460174c76e2676f7176d3f24 Mon Sep 17 00:00:00 2001 From: Mark Burnett Date: Fri, 15 Jun 2018 12:22:24 -0500 Subject: [PATCH] Use Kubernetes lease endpoint reconciler In the resiliency gate: * Enable the --endpoint-reconciler-type=least option for the apiserver. * Extract etcd validation into its own stages. * Test joining a node while one control plane node is down. Change-Id: Id89b0816e91ab6427c5e2f4833ad4ec4e1e3d133 Depends-On: I2150d40e917567a4072a1565c1b96089f3d6fd2b --- examples/basic/Genesis.yaml | 7 ++++ examples/basic/armada-resources.yaml | 6 ++++ tools/g2/lib/validate.sh | 2 +- tools/g2/manifests/one.json | 22 +++++++++++++ tools/g2/manifests/resiliency.json | 49 +++++++++++++++++++++++++--- tools/g2/stages/check-etcd-health.sh | 38 +++++++++++++++++++++ tools/g2/stages/join-nodes.sh | 14 +------- tools/g2/stages/power-down-node.sh | 31 ++++++++++++++++++ tools/g2/stages/power-up-node.sh | 24 ++++++++++++++ tools/g2/stages/teardown-nodes.sh | 11 +------ 10 files changed, 176 insertions(+), 28 deletions(-) create mode 100644 tools/g2/manifests/one.json create mode 100755 tools/g2/stages/check-etcd-health.sh create mode 100755 tools/g2/stages/power-down-node.sh create mode 100755 tools/g2/stages/power-up-node.sh diff --git a/examples/basic/Genesis.yaml b/examples/basic/Genesis.yaml index bf57d352..0047bd44 100644 --- a/examples/basic/Genesis.yaml +++ b/examples/basic/Genesis.yaml @@ -9,6 +9,13 @@ metadata: data: hostname: n0 ip: 192.168.77.10 + apiserver: + command_prefix: + - /apiserver + - --authorization-mode=Node,RBAC + - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,ResourceQuota,DefaultTolerationSeconds + - --service-cluster-ip-range=10.96.0.0/16 + - --endpoint-reconciler-type=lease armada: target_manifest: cluster-bootstrap labels: diff --git a/examples/basic/armada-resources.yaml b/examples/basic/armada-resources.yaml index 6d487b50..17c0d49d 100644 --- a/examples/basic/armada-resources.yaml +++ b/examples/basic/armada-resources.yaml @@ -716,6 +716,12 @@ data: upgrade: no_hooks: true values: + command_prefix: + - /apiserver + - --authorization-mode=Node,RBAC + - --admission-control=NamespaceLifecycle,LimitRanger,ServiceAccount,PersistentVolumeLabel,DefaultStorageClass,ResourceQuota,DefaultTolerationSeconds + - --service-cluster-ip-range=10.96.0.0/16 + - --endpoint-reconciler-type=lease apiserver: etcd: endpoints: https://127.0.0.1:2378 diff --git a/tools/g2/lib/validate.sh b/tools/g2/lib/validate.sh index 8991b0fe..721defbf 100644 --- a/tools/g2/lib/validate.sh +++ b/tools/g2/lib/validate.sh @@ -14,7 +14,7 @@ validate_etcd_membership() { # NOTE(mark-burnett): Wait a moment for disks in test environment to settle. sleep 10 - log Validating "${CLUSTER}" etcd membership via "${VM}" + log Validating "${CLUSTER}" etcd membership via "${VM}" for members: "${EXPECTED_MEMBERS[@]}" FOUND_MEMBERS=$(etcdctl_member_list "${CLUSTER}" "${VM}" | tr '\n' ' ' | sed 's/ $//') if [[ "x${EXPECTED_MEMBERS}" != "x${FOUND_MEMBERS}" ]]; then diff --git a/tools/g2/manifests/one.json b/tools/g2/manifests/one.json new file mode 100644 index 00000000..cf36f7c8 --- /dev/null +++ b/tools/g2/manifests/one.json @@ -0,0 +1,22 @@ +{ + "configuration": [ + "promenade/schemas" + ], + "stages": [ + { + "name": "Gate Setup", + "script": "gate-setup.sh" + }, + { + "name": "Create VMs", + "script": "create-vms.sh" + } + ], + "vm": { + "memory": 20480, + "names": [ + "n0" + ], + "vcpus": 4 + } +} diff --git a/tools/g2/manifests/resiliency.json b/tools/g2/manifests/resiliency.json index 9a129292..6f12fd16 100644 --- a/tools/g2/manifests/resiliency.json +++ b/tools/g2/manifests/resiliency.json @@ -48,7 +48,14 @@ "-l", "kubernetes-controller-manager=enabled", "-l", "kubernetes-etcd=enabled", "-l", "kubernetes-scheduler=enabled", - "-l", "ucp-control-plane=enabled", + "-l", "ucp-control-plane=enabled" + ] + }, + { + "name": "Check initial etcd cluster", + "script": "check-etcd-health.sh", + "arguments": [ + "-w", "10", "-e", "kubernetes n0 n0 n1 n2", "-e", "calico n0 n0 n1 n2" ] @@ -61,6 +68,14 @@ "-n", "n3" ] }, + { + "name": "Power off n2", + "script": "power-down-node.sh", + "arguments": [ + "-s", + "-n", "n2" + ] + }, { "name": "Update Generated Certs", "script": "generate-certificates.sh", @@ -80,7 +95,21 @@ "-l", "kubernetes-controller-manager=enabled", "-l", "kubernetes-etcd=enabled", "-l", "kubernetes-scheduler=enabled", - "-l", "ucp-control-plane=enabled", + "-l", "ucp-control-plane=enabled" + ] + }, + { + "name": "Power up n2", + "script": "power-up-node.sh", + "arguments": [ + "-n", "n2" + ] + }, + { + "name": "Check full etcd cluster", + "script": "check-etcd-health.sh", + "arguments": [ + "-w", "30", "-e", "kubernetes n0 n0 n1 n2 n3", "-e", "calico n0 n0 n1 n2 n3" ] @@ -91,7 +120,13 @@ "arguments": [ "-v", "n1", "-n", "n0", - "-r", + "-r" + ] + }, + { + "name": "Check post-teardown etcd cluster", + "script": "check-etcd-health.sh", + "arguments": [ "-e", "kubernetes n1 n1 n2 n3", "-e", "calico n1 n1 n2 n3" ] @@ -102,7 +137,13 @@ "arguments": [ "-v", "n1", "-n", "n0", - "-l", "ucp-control-plane=enabled", + "-l", "ucp-control-plane=enabled" + ] + }, + { + "name": "Check final etcd cluster", + "script": "check-etcd-health.sh", + "arguments": [ "-e", "kubernetes n1 n1 n2 n3", "-e", "calico n1 n1 n2 n3" ] diff --git a/tools/g2/stages/check-etcd-health.sh b/tools/g2/stages/check-etcd-health.sh new file mode 100755 index 00000000..3b16116c --- /dev/null +++ b/tools/g2/stages/check-etcd-health.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash + +set -eu + +source "${GATE_UTILS}" + +declare -a ETCD_CLUSTERS + +WAIT_BEFORE_CHECK=0 + +while getopts "e:w:" opt; do + case "${opt}" in + e) + ETCD_CLUSTERS+=("${OPTARG}") + ;; + w) + WAIT_BEFORE_CHECK="${OPTARG}" + ;; + *) + echo "Unknown option" + exit 1 + ;; + esac +done +shift $((OPTIND-1)) + +if [ $# -gt 0 ]; then + echo "Unknown arguments specified: ${*}" + exit 1 +fi + +log Waiting "${WAIT_BEFORE_CHECK}" seconds before checking cluster health. +sleep "${WAIT_BEFORE_CHECK}" + +for etcd_validation_string in "${ETCD_CLUSTERS[@]}"; do + IFS=' ' read -a etcd_validation_args <<<"${etcd_validation_string}" + validate_etcd_membership "${etcd_validation_args[@]}" +done diff --git a/tools/g2/stages/join-nodes.sh b/tools/g2/stages/join-nodes.sh index 55d73365..4a888cec 100755 --- a/tools/g2/stages/join-nodes.sh +++ b/tools/g2/stages/join-nodes.sh @@ -4,7 +4,6 @@ set -eu source "${GATE_UTILS}" -declare -a ETCD_CLUSTERS declare -a LABELS declare -a NODES @@ -12,11 +11,8 @@ GET_KEYSTONE_TOKEN=0 USE_DECKHAND=0 DECKHAND_REVISION='' -while getopts "d:e:l:n:tv:" opt; do +while getopts "d:l:n:tv:" opt; do case "${opt}" in - e) - ETCD_CLUSTERS+=("${OPTARG}") - ;; d) USE_DECKHAND=1 DECKHAND_REVISION=${OPTARG} @@ -48,7 +44,6 @@ fi SCRIPT_DIR="${TEMP_DIR}/curled-scripts" -echo Etcd Clusters: "${ETCD_CLUSTERS[@]}" echo Labels: "${LABELS[@]}" echo Nodes: "${NODES[@]}" @@ -86,10 +81,3 @@ for NAME in "${NODES[@]}"; do rsync_cmd "${SCRIPT_DIR}/join-${NAME}.sh" "${NAME}:/root/promenade/" ssh_cmd "${NAME}" "/root/promenade/join-${NAME}.sh" 2>&1 | tee -a "${LOG_FILE}" done - -sleep 10 - -for etcd_validation_string in "${ETCD_CLUSTERS[@]}"; do - IFS=' ' read -a etcd_validation_args <<<"${etcd_validation_string}" - validate_etcd_membership "${etcd_validation_args[@]}" -done diff --git a/tools/g2/stages/power-down-node.sh b/tools/g2/stages/power-down-node.sh new file mode 100755 index 00000000..bb7bcd03 --- /dev/null +++ b/tools/g2/stages/power-down-node.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -eu + +source "${GATE_UTILS}" + +declare -a NODES +SYNC_BEFORE_STOP=0 + +while getopts "n:s" opt; do + case "${opt}" in + n) + NODES+=("${OPTARG}") + ;; + s) + SYNC_BEFORE_STOP=1 + ;; + *) + echo "Unknown option" + exit 1 + ;; + esac +done +shift $((OPTIND-1)) + +for node in "${NODES[@]}"; do + if [[ $SYNC_BEFORE_STOP == 1 ]]; then + ssh_cmd "${node}" sync + fi + vm_stop "${node}" +done diff --git a/tools/g2/stages/power-up-node.sh b/tools/g2/stages/power-up-node.sh new file mode 100755 index 00000000..419319c9 --- /dev/null +++ b/tools/g2/stages/power-up-node.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -eu + +source "${GATE_UTILS}" + +declare -a NODES + +while getopts "n:s" opt; do + case "${opt}" in + n) + NODES+=("${OPTARG}") + ;; + *) + echo "Unknown option" + exit 1 + ;; + esac +done +shift $((OPTIND-1)) + +for node in "${NODES[@]}"; do + vm_start "${node}" +done diff --git a/tools/g2/stages/teardown-nodes.sh b/tools/g2/stages/teardown-nodes.sh index 363d7925..3123fcdd 100755 --- a/tools/g2/stages/teardown-nodes.sh +++ b/tools/g2/stages/teardown-nodes.sh @@ -4,16 +4,12 @@ set -eu source "${GATE_UTILS}" -declare -a ETCD_CLUSTERS declare -a NODES RECREATE=0 -while getopts "e:n:rv:" opt; do +while getopts "n:rv:" opt; do case "${opt}" in - e) - ETCD_CLUSTERS+=("${OPTARG}") - ;; n) NODES+=("${OPTARG}") ;; @@ -44,8 +40,3 @@ for NAME in "${NODES[@]}"; do vm_create "${NAME}" fi done - -for etcd_validation_string in "${ETCD_CLUSTERS[@]}"; do - IFS=' ' read -a etcd_validation_args <<<"${etcd_validation_string}" - validate_etcd_membership "${etcd_validation_args[@]}" -done