[WIP - ubuntu-uplift] - support for ubuntu 18.04
Change-Id: Idaacd83e729a10f39e124bf3a788583a5d6c5e5b
This commit is contained in:
parent
a3c961efe6
commit
983a3e85f2
|
@ -59,6 +59,26 @@ else:
|
|||
fi
|
||||
}
|
||||
|
||||
die(){
|
||||
set +x
|
||||
# write to stderr any passed error message
|
||||
if [[ $@ = *[!\ ]* ]]; then
|
||||
echo "$@"
|
||||
fi
|
||||
echo "Backtrace:"
|
||||
for ((i=0;i<${#FUNCNAME[@]}-1;i++)); do
|
||||
echo $(caller $i)
|
||||
done
|
||||
echo __EXIT_MARKER__
|
||||
# Exit after pipe closes to ensure all output is flushed first
|
||||
while : ; do
|
||||
echo "Waiting on exit..." || exit 1
|
||||
done
|
||||
}
|
||||
export -f die
|
||||
trap 'die' ERR
|
||||
set -x
|
||||
|
||||
|
||||
install_file(){
|
||||
local path="$1"
|
||||
|
@ -110,6 +130,8 @@ echo "Using rendered manifests file '$rendered_file'"
|
|||
# env vars which can be set if you want to disable
|
||||
: "${DISABLE_SECCOMP_PROFILE:=}"
|
||||
: "${DISABLE_APPARMOR_PROFILES:=}"
|
||||
: ${DISABLE_NTPD_SETUP:=}
|
||||
: ${DISABLE_DNS_CONFIGURE:=}
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -171,3 +193,238 @@ if [[ ! $DISABLE_APPARMOR_PROFILES ]]; then
|
|||
systemctl reload apparmor.service
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Set DNS servers
|
||||
###############################################################################
|
||||
|
||||
if [[ ! $DISABLE_DNS_CONFIGURE ]]; then
|
||||
# Fetch upstream DNS servers
|
||||
manifests_lookup "$rendered_file" "pegleg/CommonAddresses/v1" \
|
||||
"common-addresses" "['data']['dns']['upstream_servers']"
|
||||
dns_servers="$(echo $RESULT | tr '\n' ' ')"
|
||||
echo "Upstream DNS server(s): $dns_servers"
|
||||
|
||||
# Fetch domain name (used for dns search)
|
||||
manifests_lookup "$rendered_file" "pegleg/CommonAddresses/v1" \
|
||||
"common-addresses" "['data']['dns']['node_domain']"
|
||||
domain="$RESULT"
|
||||
echo "Domain located: '$domain'"
|
||||
|
||||
# Update node DNS settings
|
||||
resolv_conf=/etc/resolv.conf
|
||||
new_resolv_conf="$(mktemp -u)"
|
||||
for server in $dns_servers; do
|
||||
if ! grep "nameserver $server" "$resolv_conf"; then
|
||||
echo "nameserver $server" >> "$new_resolv_conf"
|
||||
fi
|
||||
done
|
||||
if ! grep "domain $domain" "$resolv_conf"; then
|
||||
echo "domain $domain" >> "$new_resolv_conf"
|
||||
fi
|
||||
if ! grep "search $domain" "$resolv_conf"; then
|
||||
echo "search $domain" >> "$new_resolv_conf"
|
||||
fi
|
||||
if [[ -f $new_resolv_conf ]]; then
|
||||
# preserve any existing settings, but they go to the end of the list
|
||||
cat "$resolv_conf" >> "$new_resolv_conf"
|
||||
cp "$new_resolv_conf" "$resolv_conf"
|
||||
rm "$new_resolv_conf"
|
||||
fi
|
||||
|
||||
# perform nslookup test
|
||||
nslookup google.com > /dev/null || \
|
||||
die "DNS lookup for google.com failed. Check your manifest DNS servers."
|
||||
fi
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Install and configure ntpd
|
||||
###############################################################################
|
||||
|
||||
# The min number of NTP servers that should meet the below criteria
|
||||
# NOTE: ntp expects a quorum of at least 3 servers
|
||||
: ${NTPD_SERVER_COUNT_THRESHOLD:=3}
|
||||
|
||||
# The maximum allowed stratum
|
||||
: ${NTPD_SERVER_STRATUM_THRESHOLD:=4}
|
||||
|
||||
# The maximum allowed offset (miliseconds, absolute value)
|
||||
# NOTE: Ceph requires <= 50ms offset to function properly
|
||||
: ${NTPD_SERVER_OFFSET_THRESHOLD:=50}
|
||||
|
||||
# The maximum allowed jitter (miliseconds, absolute value)
|
||||
: ${NTPD_SERVER_JITTER_THRESHOLD:=25}
|
||||
|
||||
# The minimum allowed reach
|
||||
: ${NTPD_SERVER_REACH_THRESHOLD:=377}
|
||||
|
||||
# The maximum number of seconds to wait for above criteria to be met
|
||||
# NOTE: it takes 8 polling intervals to go from reach 0 to reach 377, and each
|
||||
# polling interval is 64 seconds, so timeout should be at least this much.
|
||||
: ${NTPD_HEALTH_TIMEOUT:=512}
|
||||
|
||||
# for ubuntu 18.04 LTS always restart ntp service
|
||||
timedatectl set-ntp no
|
||||
apt_install ntp
|
||||
systemctl restart ntp
|
||||
sleep 15
|
||||
|
||||
if [[ ! $DISABLE_NTPD_SETUP ]]; then
|
||||
# Check if we are using a known bad kernel version that causes
|
||||
# an inability to timesync to NTP servers.
|
||||
if uname -a | grep '4.13.0-36-generic'; then
|
||||
die "YOU ARE USING A KNOWN BAD KERNEL VERSION, AND MAY HAVE SKIPPED THE KERNEL UPDATE SECTION OF THIS SCRIPT. NTP WILL NOT RUN SUCCESSFULLY UNTIL THIS IS DONE."
|
||||
fi
|
||||
|
||||
#apt_install ntp
|
||||
apt_install bc
|
||||
apt_install dnsutils #nslookup
|
||||
|
||||
# Remove Ubuntu pool entries to be consistent with MaaS deployed nodes
|
||||
sed -i '/^pool.*ubuntu.*/d' /etc/ntp.conf
|
||||
|
||||
# Fetch NTP server list
|
||||
#
|
||||
# First try data.ntp.servers (new value in list format), then fallback
|
||||
# to data.ntp.servers_joined (comma deliminted format) if needed.
|
||||
manifests_lookup "$rendered_file" "pegleg/CommonAddresses/v1" \
|
||||
"common-addresses" "['data']['ntp']['servers']" '' true
|
||||
ntp_servers="$(echo "$RESULT" | tr '\n' ' ')"
|
||||
if [[ $FAIL = true ]]; then
|
||||
manifests_lookup "$rendered_file" "pegleg/CommonAddresses/v1" \
|
||||
"common-addresses" "['data']['ntp']['servers_joined']"
|
||||
ntp_servers="$(echo $RESULT | tr ',' ' ')"
|
||||
fi
|
||||
echo "Upstream NTP server(s): $ntp_servers"
|
||||
|
||||
restart=false
|
||||
for ntp_server in $ntp_servers; do
|
||||
svr_expr="server ${ntp_server} iburst"
|
||||
if ! grep "^$svr_expr" /etc/ntp.conf; then
|
||||
echo "$svr_expr" >> /etc/ntp.conf
|
||||
restart=true
|
||||
fi
|
||||
done
|
||||
if [[ $restart = true ]]; then
|
||||
systemctl restart ntp
|
||||
# takes some time for assoc IDs to get populated
|
||||
sleep 15
|
||||
fi
|
||||
|
||||
ntp_server_healthy_count=0
|
||||
time_waited=0
|
||||
sleep_interval=10
|
||||
assids="$(ntpq -c associations | awk '{print $2}' | \
|
||||
tr -cd '[:digit:]\n' | tr '\n' ' ')"
|
||||
# Speed improvement / optimization - perform reverse DNS lookups up front
|
||||
s_idx=0
|
||||
for server in $ntp_servers; do
|
||||
# Don't stop on DNS lookup failure
|
||||
dns_info="$(nslookup $server)" || true
|
||||
# First see if reverse lookup info is there (applicable to FQDNs), then see
|
||||
# if straight DNS lookup info is there (applicable to IPs); mark as DNS
|
||||
# lookup failure otherwise. TODO: Change to more robust dig commands.
|
||||
server_alias_list[$s_idx]="$(echo "$dns_info" | grep Name: | head -1 | awk '{print $2}')" || \
|
||||
server_alias_list[$s_idx]="$(echo "$dns_info" | grep 'name = ' | head -1 | awk '{print $4}' | sed s'/.$//')" || \
|
||||
server_alias_list[$s_idx]=DNS_LOOKUP_FAILED
|
||||
s_idx=$(($s_idx + 1))
|
||||
done
|
||||
# Speed improvement / optimization - perform mreadvar of srcadr up front,
|
||||
# because each call performs a reverse DNS lookup, which can take a long
|
||||
# time if preferred DNS servers in /etc/resolv.conf are hitting their
|
||||
# timeout before trying the next one.
|
||||
a_idx=0
|
||||
for assid in $assids; do
|
||||
if [[ -n $assid ]]; then
|
||||
assid_srcadr_list[$a_idx]="$(ntpq -c "mreadvar $assid $assid srcadr" | cut -d'=' -f2)"
|
||||
a_idx=$(($a_idx + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
a_idx=0
|
||||
for assid in $assids; do
|
||||
if [[ -n $assid ]]; then
|
||||
ntpq_reported_srcadr="${assid_srcadr_list[$a_idx]}"
|
||||
a_idx=$(($a_idx + 1))
|
||||
s_idx=0
|
||||
for server in $ntp_servers; do
|
||||
server_alias="${server_alias_list[$s_idx]}"
|
||||
s_idx=$(($s_idx + 1))
|
||||
if [[ $ntpq_reported_srcadr = $server ]] || \
|
||||
[[ $ntpq_reported_srcadr = $server_alias ]]; then
|
||||
stratum="$(ntpq -c "mreadvar $assid $assid stratum" | cut -d'=' -f2)"
|
||||
if [[ $stratum -gt $NTPD_SERVER_STRATUM_THRESHOLD ]]; then
|
||||
echo "NTP server '$server' with stratum '$stratum' did not meet the stratum threadhold '$NTPD_SERVER_STRATUM_THRESHOLD'"
|
||||
echo "If you're certain you wish to use this time server, then increase the NTPD_SERVER_STRATUM_THRESHOLD."
|
||||
echo "NTP server exceeds defined stratum threshold."
|
||||
continue
|
||||
fi
|
||||
failed_server=false
|
||||
while [[ $(ntpq -c "mreadvar $assid $assid reach" | cut -d'=' -f2) -lt \
|
||||
$NTPD_SERVER_REACH_THRESHOLD ]]; do
|
||||
if [[ $time_waited -ge $NTPD_HEALTH_TIMEOUT ]]; then
|
||||
echo "NTP server '$server' did not achieve desired reach within timeout period '$NTPD_HEALTH_TIMEOUT' seconds."
|
||||
failed_server=true
|
||||
break
|
||||
fi
|
||||
echo "NTP reach threshold not achieved yet for '$server'"
|
||||
echo "reach is '$(ntpq -c "mreadvar $assid $assid reach" | cut -d'=' -f2)', expected >= '$NTPD_SERVER_REACH_THRESHOLD'"
|
||||
echo "Waiting up to '$NTPD_HEALTH_TIMEOUT's for reach >= threshold value."
|
||||
sleep $sleep_interval
|
||||
time_waited=$(($time_waited + $sleep_interval))
|
||||
done
|
||||
if [[ $failed_server = true ]]; then continue; fi
|
||||
# use bc for floating point comparison
|
||||
# With bc output, 0 = false, 1 = true (opposite of return codes)
|
||||
while [[ $(echo "$(ntpq -c "mreadvar $assid $assid jitter" | cut -d'=' -f2) <= $NTPD_SERVER_JITTER_THRESHOLD" | bc -l) = 0 ]]; do
|
||||
if [[ $time_waited -ge $NTPD_HEALTH_TIMEOUT ]]; then
|
||||
echo "Jitter is above threshold."
|
||||
echo "A high jitter indicates an inability to converge time with this time server."
|
||||
echo "IN MOST CASES, ISSUE WILL RESOLVE WITHIN A COUPLE OF HOURS - monitor for decreasing jitter values with 'ntpq -p'."
|
||||
echo "If error persists, troubleshoot the network connection to timeserver."
|
||||
echo "NTP server '$server' did not achieve desired jitter within timeout period '$NTPD_HEALTH_TIMEOUT' seconds."
|
||||
failed_server=true
|
||||
break
|
||||
fi
|
||||
echo "NTP jitter threshold not achieved yet for '$server'"
|
||||
echo "jitter is '$(ntpq -c "mreadvar $assid $assid jitter" | cut -d '=' -f2)', expected <= '$NTPD_SERVER_JITTER_THRESHOLD'"
|
||||
echo "Waiting up to '$NTPD_HEALTH_TIMEOUT's for offset >= threshold value."
|
||||
sleep $sleep_interval
|
||||
time_waited=$(($time_waited + $sleep_interval))
|
||||
done
|
||||
if [[ $failed_server = true ]]; then continue; fi
|
||||
# use bc for floating point comparison
|
||||
# With bc output, 0 = false, 1 = true (opposite of return codes)
|
||||
# sqrt(x^2) used to get absolute value, since offset may be + or -
|
||||
while [[ $(echo "sqrt($(ntpq -c "mreadvar $assid $assid offset" | cut -d '=' -f2)^2) <= $NTPD_SERVER_OFFSET_THRESHOLD" | bc -l) = 0 ]]; do
|
||||
if [[ $time_waited -ge $NTPD_HEALTH_TIMEOUT ]]; then
|
||||
echo "NTP server '$server' did not achieve desired offset within timeout period '$NTPD_HEALTH_TIMEOUT' seconds."
|
||||
failed_server=true
|
||||
break
|
||||
fi
|
||||
echo "NTP offset threshold not achieved yet for '$server'"
|
||||
echo "offset is '$(ntpq -c "mreadvar $assid $assid offset" | cut -d'=' -f2)', expected >= '$NTPD_SERVER_OFFSET_THRESHOLD'"
|
||||
echo "Waiting up to '$NTPD_HEALTH_TIMEOUT's for offset >= threshold value."
|
||||
sleep $sleep_interval
|
||||
time_waited=$(($time_waited + $sleep_interval))
|
||||
done
|
||||
if [[ $failed_server = true ]]; then continue; fi
|
||||
ntp_server_healthy_count=$(($ntp_server_healthy_count + 1))
|
||||
fi
|
||||
# Break out once threshold is met
|
||||
if [[ $ntp_server_healthy_count -ge $NTPD_SERVER_COUNT_THRESHOLD ]]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
fi
|
||||
# Break out once threshold is met
|
||||
if [[ $ntp_server_healthy_count -ge $NTPD_SERVER_COUNT_THRESHOLD ]]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ $ntp_server_healthy_count -lt $NTPD_SERVER_COUNT_THRESHOLD ]]; then
|
||||
echo "Expected '$NTPD_SERVER_COUNT_THRESHOLD' healthy NTP servers, but got '$ntp_server_healthy_count'"
|
||||
die "Did not satisfy the minimum number of healthy NTP servers. See https://wiki.web.att.com/display/CCPdev/Network+Cloud+Troubleshooting#NetworkCloudTroubleshooting-TroubleshootingTimeSyncIssues"
|
||||
fi
|
||||
fi
|
||||
|
|
Loading…
Reference in New Issue