Updated resiliency gate
Updated resiliency gate script to consistently pass all gate stages, using ubuntu bionic image for node deployment. - Updated developer-onbording.rst with information on how to configure and run the resilency gate behind corporate proxy. - Updated the gate scripts to use the proxy configuration. - Updated up.sh to pull the hyperkube image as cache, to speed up and stabalize the initial kublet deployment of kubernetes cluster services. - Updated and added sleeps and retries in some of gate stages and scripts to avoid gate failures due to transient environment issues. - Updated the ubuntu base image for node deployments from xenial to\ bionic base image. - Added code in treadown-nodes stage to manually remove the etcd members: kubernetes and calico, since they still remain listed as etcd members on genesis node, even after genesis is torn down. Change-Id: Ia11d66ab30ac7a07626d4f1d02a6da48155f862d
This commit is contained in:
parent
cb4ae15eb1
commit
9f42b502f7
|
@ -43,6 +43,25 @@ debug it, e.g.:
|
||||||
|
|
||||||
./tools/g2/bin/ssh.sh n0
|
./tools/g2/bin/ssh.sh n0
|
||||||
|
|
||||||
|
Running Resilency Tests Behind Corporate Proxy
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
If your development environment is behind a corporate proxy, you will need to
|
||||||
|
update following files to add your envrionment's proxy information, dns, or
|
||||||
|
possibly your internal ntp servers, in order to deploy airship:
|
||||||
|
|
||||||
|
* `charts/coredns/values.yaml`: Update the upstream coredns nameserver IPs
|
||||||
|
to your internal DNS addresses.
|
||||||
|
* `examples/basic/KubernetesNetwork.yaml`: Since resilency manifest uses
|
||||||
|
the examples/basic environment configuration, you will need to Update
|
||||||
|
the kubernetes network configuration in this folder. Update the upstream
|
||||||
|
nameserver IPs to your internal DNS addresses. Add the http(s) proxy URL
|
||||||
|
and additional_no_proxy list. Also, if your enviornment requires that,
|
||||||
|
update the ntp server list to your internal ntp server addresses for
|
||||||
|
more reliable time sync.
|
||||||
|
* `tools/g2/templates/network-config.sub`: Update the upstream nameserver
|
||||||
|
IPs to your internal DNS addresses.
|
||||||
|
|
||||||
Bootstrapping
|
Bootstrapping
|
||||||
-------------
|
-------------
|
||||||
|
|
||||||
|
|
|
@ -74,6 +74,14 @@ export http_proxy={{ config['KubernetesNetwork:proxy.url'] | default('', true) }
|
||||||
export https_proxy={{ config['KubernetesNetwork:proxy.url'] | default('', true) }}
|
export https_proxy={{ config['KubernetesNetwork:proxy.url'] | default('', true) }}
|
||||||
export no_proxy={{ config.get(kind='KubernetesNetwork') | fill_no_proxy }}
|
export no_proxy={{ config.get(kind='KubernetesNetwork') | fill_no_proxy }}
|
||||||
|
|
||||||
|
# Configure apt proxy
|
||||||
|
if [[ -n "${http_proxy}" ]]; then
|
||||||
|
log "Configuring Apt Proxy"
|
||||||
|
cat << EOF | sudo tee /etc/apt/apt.conf.d/50proxyconf
|
||||||
|
Acquire::https::proxy "${https_proxy}";
|
||||||
|
Acquire::http::proxy "${http_proxy}";
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
# Install system packages
|
# Install system packages
|
||||||
#
|
#
|
||||||
|
@ -139,5 +147,13 @@ fi
|
||||||
if systemctl -q is-enabled containerd > /dev/null 2>&1; then
|
if systemctl -q is-enabled containerd > /dev/null 2>&1; then
|
||||||
systemctl restart containerd || true
|
systemctl restart containerd || true
|
||||||
fi
|
fi
|
||||||
|
# Pull the hyperkube image prior to restarting kubelet, this is
|
||||||
|
# needed for more reliable image pull in an environment with slow
|
||||||
|
# network connectivity to avoid image pull timeouts and retries by
|
||||||
|
# kubelet.
|
||||||
|
# The || true is added to let the deployment continue, evenif the
|
||||||
|
# $IMAGE_HYPERKUBE is not defined in the environment, and the image
|
||||||
|
# pull doesn't happen.
|
||||||
|
docker image pull "${IMAGE_HYPERKUBE}" || true
|
||||||
systemctl enable kubelet
|
systemctl enable kubelet
|
||||||
systemctl restart kubelet
|
systemctl restart kubelet
|
||||||
|
|
|
@ -222,7 +222,7 @@ function validate_kubectl_logs {
|
||||||
NAMESPACE=default
|
NAMESPACE=default
|
||||||
POD_NAME=log-test-${NODE}-$(date +%s)
|
POD_NAME=log-test-${NODE}-$(date +%s)
|
||||||
|
|
||||||
cat <<EOPOD | kubectl --namespace $NAMESPACE apply -f -
|
cat <<EOPOD | kubectl --namespace $NAMESPACE --timeout 100s apply -f -
|
||||||
---
|
---
|
||||||
apiVersion: v1
|
apiVersion: v1
|
||||||
kind: Pod
|
kind: Pod
|
||||||
|
@ -244,6 +244,7 @@ EOPOD
|
||||||
|
|
||||||
wait_for_node_ready $NODE 300
|
wait_for_node_ready $NODE 300
|
||||||
wait_for_pod_termination $NAMESPACE $POD_NAME
|
wait_for_pod_termination $NAMESPACE $POD_NAME
|
||||||
|
sleep 5
|
||||||
ACTUAL_LOGS=$(kubectl --namespace $NAMESPACE logs $POD_NAME)
|
ACTUAL_LOGS=$(kubectl --namespace $NAMESPACE logs $POD_NAME)
|
||||||
if [ "x$ACTUAL_LOGS" != "xEXPECTED RESULT" ]; then
|
if [ "x$ACTUAL_LOGS" != "xEXPECTED RESULT" ]; then
|
||||||
log Got unexpected logs:
|
log Got unexpected logs:
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
export TEMP_DIR=${TEMP_DIR:-$(mktemp -d)}
|
export TEMP_DIR=${TEMP_DIR:-$(mktemp -d)}
|
||||||
export BASE_IMAGE_SIZE=${BASE_IMAGE_SIZE:-68719476736}
|
export BASE_IMAGE_SIZE=${BASE_IMAGE_SIZE:-344784896}
|
||||||
export BASE_IMAGE_URL=${BASE_IMAGE_URL:-https://cloud-images.ubuntu.com/releases/16.04/release/ubuntu-16.04-server-cloudimg-amd64-disk1.img}
|
export BASE_IMAGE_URL=${BASE_IMAGE_URL:-https://cloud-images.ubuntu.com/releases/bionic/release/ubuntu-18.04-server-cloudimg-amd64.img}
|
||||||
export IMAGE_PROMENADE=${IMAGE_PROMENADE:-quay.io/airshipit/promenade:master}
|
export IMAGE_PROMENADE=${IMAGE_PROMENADE:-quay.io/airshipit/promenade:master}
|
||||||
export IMAGE_PROMENADE_DISTRO=${IMAGE_PROMENADE_DISTRO:-ubuntu_bionic}
|
export IMAGE_PROMENADE_DISTRO=${IMAGE_PROMENADE_DISTRO:-ubuntu_bionic}
|
||||||
export IMAGE_HYPERKUBE=${IMAGE_HYPERKUBE:-gcr.io/google_containers/hyperkube-amd64:v1.17.3}
|
export IMAGE_HYPERKUBE=${IMAGE_HYPERKUBE:-gcr.io/google_containers/hyperkube-amd64:v1.17.3}
|
||||||
|
|
|
@ -14,3 +14,17 @@ etcdctl_member_list() {
|
||||||
|
|
||||||
etcdctl_cmd "${CLUSTER}" "${VM}" member list -w json | jq -r '.members[].name' | sort
|
etcdctl_cmd "${CLUSTER}" "${VM}" member list -w json | jq -r '.members[].name' | sort
|
||||||
}
|
}
|
||||||
|
|
||||||
|
etcdctl_member_remove() {
|
||||||
|
CLUSTER=${1}
|
||||||
|
VM=${2}
|
||||||
|
NODE=${3}
|
||||||
|
shift 3
|
||||||
|
|
||||||
|
MEMBER_ID=$(etcdctl_cmd $CLUSTER ${VM} member list | awk -F', ' "/${NODE}/ "'{ print $1}')
|
||||||
|
if [[ -n $MEMBER_ID ]] ; then
|
||||||
|
etcdctl_cmd "${CLUSTER}" "${VM}" member remove "$MEMBER_ID"
|
||||||
|
else
|
||||||
|
log No members found in cluster "$CLUSTER" for node "$NODE"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
|
@ -13,13 +13,24 @@ validate_etcd_membership() {
|
||||||
EXPECTED_MEMBERS="${*}"
|
EXPECTED_MEMBERS="${*}"
|
||||||
|
|
||||||
# NOTE(mark-burnett): Wait a moment for disks in test environment to settle.
|
# NOTE(mark-burnett): Wait a moment for disks in test environment to settle.
|
||||||
sleep 10
|
sleep 60
|
||||||
log Validating "${CLUSTER}" etcd membership via "${VM}" for members: "${EXPECTED_MEMBERS[@]}"
|
log Validating "${CLUSTER}" etcd membership via "${VM}" for members: "${EXPECTED_MEMBERS[@]}"
|
||||||
FOUND_MEMBERS=$(etcdctl_member_list "${CLUSTER}" "${VM}" | tr '\n' ' ' | sed 's/ $//')
|
|
||||||
|
|
||||||
if [[ "x${EXPECTED_MEMBERS}" != "x${FOUND_MEMBERS}" ]]; then
|
local retries=25
|
||||||
log Etcd membership check failed for cluster "${CLUSTER}"
|
for ((n=0;n<=$retries;n++)); do
|
||||||
|
FOUND_MEMBERS=$(etcdctl_member_list "${CLUSTER}" "${VM}" | tr '\n' ' ' | sed 's/ $//')
|
||||||
|
|
||||||
log "Found \"${FOUND_MEMBERS}\", expected \"${EXPECTED_MEMBERS}\""
|
log "Found \"${FOUND_MEMBERS}\", expected \"${EXPECTED_MEMBERS}\""
|
||||||
exit 1
|
if [[ "x${EXPECTED_MEMBERS}" != "x${FOUND_MEMBERS}" ]]; then
|
||||||
fi
|
log Etcd membership check failed for cluster "${CLUSTER}" on attempt "$n".
|
||||||
|
if [[ "$n" == "$retries" ]]; then
|
||||||
|
log Etcd membership check failed for cluster "${CLUSTER}" after "$n" retries. Exiting.
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
sleep 30
|
||||||
|
else
|
||||||
|
log Etcd membership check succeeded for cluster "${CLUSTER}" on attempt "${n}"
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
done
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,6 +120,8 @@
|
||||||
"name": "Teardown Genesis",
|
"name": "Teardown Genesis",
|
||||||
"script": "teardown-nodes.sh",
|
"script": "teardown-nodes.sh",
|
||||||
"arguments": [
|
"arguments": [
|
||||||
|
"-e", "kubernetes",
|
||||||
|
"-e", "calico",
|
||||||
"-v", "n1",
|
"-v", "n1",
|
||||||
"-n", "n0",
|
"-n", "n0",
|
||||||
"-r"
|
"-r"
|
||||||
|
@ -160,7 +162,7 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"vm": {
|
"vm": {
|
||||||
"memory": 3072,
|
"memory": 4096,
|
||||||
"names": [
|
"names": [
|
||||||
"n0",
|
"n0",
|
||||||
"n1",
|
"n1",
|
||||||
|
|
|
@ -7,7 +7,9 @@ source "${GATE_UTILS}"
|
||||||
rsync_cmd "${TEMP_DIR}/scripts"/*genesis* "${GENESIS_NAME}:/root/promenade/"
|
rsync_cmd "${TEMP_DIR}/scripts"/*genesis* "${GENESIS_NAME}:/root/promenade/"
|
||||||
|
|
||||||
set -o pipefail
|
set -o pipefail
|
||||||
ssh_cmd "${GENESIS_NAME}" env "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY}" /root/promenade/genesis.sh 2>&1 | tee -a "${LOG_FILE}"
|
ssh_cmd "${GENESIS_NAME}" env "IMAGE_HYPERKUBE=${IMAGE_HYPERKUBE}" \
|
||||||
|
env "PROMENADE_ENCRYPTION_KEY=${PROMENADE_ENCRYPTION_KEY}" \
|
||||||
|
/root/promenade/genesis.sh 2>&1 | tee -a "${LOG_FILE}"
|
||||||
ssh_cmd "${GENESIS_NAME}" /root/promenade/validate-genesis.sh 2>&1 | tee -a "${LOG_FILE}"
|
ssh_cmd "${GENESIS_NAME}" /root/promenade/validate-genesis.sh 2>&1 | tee -a "${LOG_FILE}"
|
||||||
set +o pipefail
|
set +o pipefail
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@ mkdir -p "${SCRIPT_DIR}"
|
||||||
for NAME in "${NODES[@]}"; do
|
for NAME in "${NODES[@]}"; do
|
||||||
log Building join script for node "${NAME}"
|
log Building join script for node "${NAME}"
|
||||||
|
|
||||||
CURL_ARGS=("--fail" "--max-time" "300" "--retry" "16" "--retry-delay" "15")
|
CURL_ARGS=("-v" "--max-time" "600" "--retry" "20" "--retry-delay" "15" "--connect-timeout" "30" "--progress-bar")
|
||||||
if [[ $GET_KEYSTONE_TOKEN == 1 ]]; then
|
if [[ $GET_KEYSTONE_TOKEN == 1 ]]; then
|
||||||
TOKEN="$(os_ks_get_token "${VIA}")"
|
TOKEN="$(os_ks_get_token "${VIA}")"
|
||||||
if [[ -z $TOKEN ]]; then
|
if [[ -z $TOKEN ]]; then
|
||||||
|
@ -67,7 +67,7 @@ for NAME in "${NODES[@]}"; do
|
||||||
promenade_health_check "${VIA}"
|
promenade_health_check "${VIA}"
|
||||||
|
|
||||||
log "Validating documents"
|
log "Validating documents"
|
||||||
ssh_cmd "${VIA}" curl -v "${CURL_ARGS[@]}" -X POST -H "Content-Type: application/json" -d "$(promenade_render_validate_body "${USE_DECKHAND}" "${DECKHAND_REVISION}")" "$(promenade_render_validate_url)"
|
ssh_cmd "${VIA}" curl "${CURL_ARGS[@]}" -X POST -H "Content-Type: application/json" -d "$(promenade_render_validate_body "${USE_DECKHAND}" "${DECKHAND_REVISION}")" "$(promenade_render_validate_url)"
|
||||||
|
|
||||||
JOIN_CURL_URL="$(promenade_render_curl_url "${NAME}" "${USE_DECKHAND}" "${DECKHAND_REVISION}" "${LABELS[@]}")"
|
JOIN_CURL_URL="$(promenade_render_curl_url "${NAME}" "${USE_DECKHAND}" "${DECKHAND_REVISION}" "${LABELS[@]}")"
|
||||||
log "Fetching join script via: ${JOIN_CURL_URL}"
|
log "Fetching join script via: ${JOIN_CURL_URL}"
|
||||||
|
|
|
@ -6,15 +6,15 @@ source "${GATE_UTILS}"
|
||||||
|
|
||||||
VIA="n1"
|
VIA="n1"
|
||||||
|
|
||||||
CURL_ARGS=("--fail" "--max-time" "300" "--retry" "16" "--retry-delay" "15")
|
CURL_ARGS=("-v" "--max-time" "600" "--retry" "20" "--retry-delay" "15" "--connect-timeout" "30" "--progress-bar")
|
||||||
|
|
||||||
log Adding labels to node n0
|
log "Adding labels to node n0"
|
||||||
JSON="{\"calico-etcd\": \"enabled\", \"coredns\": \"enabled\", \"kubernetes-apiserver\": \"enabled\", \"kubernetes-controller-manager\": \"enabled\", \"kubernetes-etcd\": \"enabled\", \"kubernetes-scheduler\": \"enabled\", \"ucp-control-plane\": \"enabled\"}"
|
JSON="{\"calico-etcd\": \"enabled\", \"coredns\": \"enabled\", \"kubernetes-apiserver\": \"enabled\", \"kubernetes-controller-manager\": \"enabled\", \"kubernetes-etcd\": \"enabled\", \"kubernetes-scheduler\": \"enabled\", \"ucp-control-plane\": \"enabled\"}"
|
||||||
|
|
||||||
ssh_cmd "${VIA}" curl -v "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n0)"
|
ssh_cmd "${VIA}" curl "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n0)"
|
||||||
|
|
||||||
# Need to wait
|
# Need to wait
|
||||||
sleep 60
|
sleep 120
|
||||||
|
|
||||||
validate_etcd_membership kubernetes n1 n0 n1 n2 n3
|
validate_etcd_membership kubernetes n1 n0 n1 n2 n3
|
||||||
validate_etcd_membership calico n1 n0 n1 n2 n3
|
validate_etcd_membership calico n1 n0 n1 n2 n3
|
||||||
|
@ -22,10 +22,10 @@ validate_etcd_membership calico n1 n0 n1 n2 n3
|
||||||
log Removing labels from node n2
|
log Removing labels from node n2
|
||||||
JSON="{\"coredns\": \"enabled\", \"ucp-control-plane\": \"enabled\"}"
|
JSON="{\"coredns\": \"enabled\", \"ucp-control-plane\": \"enabled\"}"
|
||||||
|
|
||||||
ssh_cmd "${VIA}" curl -v "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n2)"
|
ssh_cmd "${VIA}" curl "${CURL_ARGS[@]}" -X PUT -H "Content-Type: application/json" -d "${JSON}" "$(promenade_put_labels_url n2)"
|
||||||
|
|
||||||
# Need to wait
|
# Need to wait
|
||||||
sleep 60
|
sleep 120
|
||||||
|
|
||||||
validate_cluster n1
|
validate_cluster n1
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,11 @@ declare -a NODES
|
||||||
|
|
||||||
RECREATE=0
|
RECREATE=0
|
||||||
|
|
||||||
while getopts "n:rv:" opt; do
|
while getopts "e:n:rv:" opt; do
|
||||||
case "${opt}" in
|
case "${opt}" in
|
||||||
|
e)
|
||||||
|
ETCD_CLUSTERS+=("${OPTARG}")
|
||||||
|
;;
|
||||||
n)
|
n)
|
||||||
NODES+=("${OPTARG}")
|
NODES+=("${OPTARG}")
|
||||||
;;
|
;;
|
||||||
|
@ -35,6 +38,9 @@ fi
|
||||||
for NAME in "${NODES[@]}"; do
|
for NAME in "${NODES[@]}"; do
|
||||||
log Tearing down node "${NAME}"
|
log Tearing down node "${NAME}"
|
||||||
promenade_teardown_node "${NAME}" "${VIA}"
|
promenade_teardown_node "${NAME}" "${VIA}"
|
||||||
|
for ETCD_CLUSTER in "${ETCD_CLUSTERS[@]}"; do
|
||||||
|
etcdctl_member_remove "${ETCD_CLUSTER}" "${VIA}" "${NAME}"
|
||||||
|
done
|
||||||
vm_clean "${NAME}"
|
vm_clean "${NAME}"
|
||||||
if [[ ${RECREATE} == "1" ]]; then
|
if [[ ${RECREATE} == "1" ]]; then
|
||||||
vm_create "${NAME}"
|
vm_create "${NAME}"
|
||||||
|
|
Loading…
Reference in New Issue