From c84a5b64de0bf9bcf72b88bdbb81fc13d69a86aa Mon Sep 17 00:00:00 2001 From: "Hussey, Scott (sh8121)" Date: Fri, 7 Jun 2019 08:55:50 -0500 Subject: [PATCH] (fix) Check sync of only active rack controllers - The import resources job waits for rack controllers to sync the imported resources. In an environment that rack pods have been scheduled away from a node, it leaves a dead rack controller in the MAAS database. We cannot gate on dead controllers syncing as rackd is no longer running, so now only look at rack controllers with a running rackd. Change-Id: I5ca16a0c97ed201a08844ca7c82c2cbb7d059aa7 --- .../bin/_import-boot-resources.sh.tpl | 171 +++++++++++------- 1 file changed, 110 insertions(+), 61 deletions(-) diff --git a/charts/maas/templates/bin/_import-boot-resources.sh.tpl b/charts/maas/templates/bin/_import-boot-resources.sh.tpl index d08c2ee..0c73ecd 100644 --- a/charts/maas/templates/bin/_import-boot-resources.sh.tpl +++ b/charts/maas/templates/bin/_import-boot-resources.sh.tpl @@ -22,83 +22,121 @@ TRY_LIMIT=${TRY_LIMIT:-1} JOB_TIMEOUT=${JOB_TIMEOUT:-900} RETRY_TIMER=${RETRY_TIMER:-30} -function start_import { - check_for_download +function timer { + retry_wait=$1 + shift - if [[ $? -eq 0 ]] + while [[ ${JOB_TIMEOUT} -gt 0 ]] + do + "$@" + rc=$? + if [ $rc -eq 0 ] then - echo "Already have images, skipping import." - return 0 + return $rc + else + JOB_TIMEOUT=$(($JOB_TIMEOUT - $retry_wait)) + sleep $retry_wait fi + done - while [[ ${import_tries} -lt $TRY_LIMIT ]] - do - import_tries=$(($import_tries + 1)) - echo "Starting image import try ${import_tries}..." - maas ${ADMIN_USERNAME} boot-resources import - sleep 30 # Seems MAAS needs time to sync up - check_for_download - if [[ $? -eq 0 ]] - then - echo "Image import success!" - return 0 - fi - done - return 1 + return 124 +} + +function import_resources { + check_for_download + rc=$? + + if [ $rc -ne 0 ] + then + echo "Starting image import try ${import_tries}..." + maas ${ADMIN_USERNAME} boot-resources import + sleep 30 + check_for_download + rc=$? + fi + + return $rc +} + +function start_import { + timer "$RETRY_TIMER" import_resources } function check_for_download { - - while [[ ${JOB_TIMEOUT} -gt 0 ]]; do - if maas ${ADMIN_USERNAME} boot-resources is-importing | grep -q 'true'; - then - echo -e '\nBoot resources currently importing\n' - let JOB_TIMEOUT-=${RETRY_TIMER} - sleep ${RETRY_TIMER} - else - synced_imgs=$(maas ${ADMIN_USERNAME} boot-resources read | tail -n +1 | jq ".[] | select( .type | contains(\"Synced\")) | .name " | grep -c $MAAS_DEFAULT_DISTRO) - if [[ $synced_imgs -gt 0 ]] - then - echo 'Boot resources have completed importing' - return 0 - else - echo 'Import failed!' - return 1 - fi - fi - done - echo "Timeout waiting for import!" + if maas ${ADMIN_USERNAME} boot-resources is-importing | grep -q 'true'; + then + echo -e '\nBoot resources currently importing\n' return 1 + else + synced_imgs=$(maas ${ADMIN_USERNAME} boot-resources read | tail -n +1 | jq ".[] | select( .type | contains(\"Synced\")) | .name " | grep -c $MAAS_DEFAULT_DISTRO) + if [[ $synced_imgs -gt 0 ]] + then + echo 'Boot resources have completed importing' + return 0 + else + echo 'Import failed!' + return 1 + fi + fi +} + +function check_then_set_single { + option="$1" + value="$2" + + cur_val=$(maas ${ADMIN_USERNAME} maas get-config name=${option} | tail -1 | tr -d '"') + desired_val=$(echo ${value} | tr -d '"') + + if [[ $cur_val != $desired_val ]] + then + echo "Setting MAAS option ${option} to ${desired_val}" + maas ${ADMIN_USERNAME} maas set-config name=${option} value=${desired_val} + return $? + else + echo "MAAS option ${option} already set to ${cur_val}" + return 0 + fi } function check_then_set { option=$1 value=$2 - while [[ ${JOB_TIMEOUT} -gt 0 ]] - do - cur_val=$(maas ${ADMIN_USERNAME} maas get-config name=${option} | tail -1 | tr -d '"') - desired_val=$(echo ${value} | tr -d '"') + timer "$RETRY_TIMER" check_then_set_single "$option" "$value" +} - if [[ $cur_val != $desired_val ]] - then - echo "Setting MAAS option ${option} to ${desired_val}" - maas ${ADMIN_USERNAME} maas set-config name=${option} value=${desired_val} - if [[ $? -gt 0 ]] - then - let JOB_TIMEOUT-=${RETRY_TIMER} - sleep ${RETRY_TIMER} - else - return $? - fi - else - echo "MAAS option ${option} already set to ${cur_val}" - return 0 - fi +# Get rack controllers reporting a healthy rackd +function get_active_rack_controllers { + maas ${ADMIN_USERNAME} rack-controllers read | jq -r 'map({"system_id":.system_id,"service_set":(.service_set[] | select(.name=="rackd"))}) | map(select(.service_set.status == "running")) | .[] | .system_id' +} + +function check_for_rack_sync_single { + sync_list="" + + rack_list=$(get_active_rack_controllers) + for rack_id in ${rack_list} + do + selected_imgs=$(maas ${ADMIN_USERNAME} rack-controller list-boot-images ${rack_id} | tail -n +1 | jq ".images[] | select( .name | contains(\"${MAAS_DEFAULT_DISTRO}\")) | .name") + synced_ctlr=$(maas ${ADMIN_USERNAME} rack-controller list-boot-images ${rack_id} | tail -n +1 | jq '.status == "synced"') + if [[ $synced_ctlr == "true" && ! -z ${selected_imgs} ]] + then + sync_list=$(echo -e "${sync_list}\n${rack_id}" | sort | uniq) + else + maas ${ADMIN_USERNAME} rack-controller import-boot-images ${rack_id} + fi + if [[ $(echo -e "${rack_list}" | sort | uniq | grep -v '^$' ) == $(echo -e "${sync_list}" | sort | uniq | grep -v '^$') ]] + then + return 0 + fi done + return 1 } +function check_for_rack_sync { + timer "$RETRY_TIMER" check_for_rack_sync_single +} + function configure_proxy { check_then_set enable_http_proxy ${MAAS_PROXY_ENABLED} check_then_set use_peer_proxy ${MAAS_PEER_PROXY_ENABLED} @@ -117,8 +155,9 @@ function configure_dns { } function configure_images { + check_for_rack_sync - if [[ $? -eq 1 ]] + if [[ $? -eq 124 ]] then echo "Timed out waiting for rack controller sync." return 1 @@ -147,8 +186,17 @@ function configure_boot_sources { fi } -KEY=$(maas-region apikey --username=${ADMIN_USERNAME}) -maas login ${ADMIN_USERNAME} ${MAAS_ENDPOINT} $KEY +function maas_login { + KEY=$(maas-region apikey --username=${ADMIN_USERNAME}) + if [ -z "$KEY" ] + then + return 1 + fi + maas login ${ADMIN_USERNAME} ${MAAS_ENDPOINT} $KEY + return $? +} + +timer "$RETRY_TIMER" maas_login configure_proxy configure_ntp @@ -157,6 +205,7 @@ configure_dns # make call to import images configure_boot_sources start_import + if [[ $? -eq 0 ]] then configure_images