diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f88d3b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,77 @@ +*.py[cod] + +# C extensions +*.so + +# Packages +*.egg* +*.egg-info +dist +build +eggs +parts +var +sdist +develop-eggs +.installed.cfg +lib +lib64 + +# Installer logs +pip-log.txt + +# Unit test / coverage reports +cover/ +.coverage* +!.coveragerc +.tox +nosetests.xml +.testrepository +.venv + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Complexity +output/*.html +output/*/index.html + +# Sphinx +doc/build + +# pbr generates these +AUTHORS +ChangeLog + +# Editors +*~ +.*.swp +.*sw? + +# Files created by releasenotes build +releasenotes/build + +# Dev tools +.idea/ +**/.vagrant +**/*.log + +# Helm internals +*.lock +*/*.lock +*.tgz +**/*.tgz +**/_partials.tpl +**/_globals.tpl + +# Gate and Check Logs +logs/ + +*/requirements.lock +*/charts +*/charts/* diff --git a/.gitreview b/.gitreview new file mode 100644 index 0000000..368e900 --- /dev/null +++ b/.gitreview @@ -0,0 +1,4 @@ +[gerrit] +host=review.gerrithub.io +port=29418 +project=att-comdev/divingbell diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..aa86e6f --- /dev/null +++ b/Makefile @@ -0,0 +1,44 @@ +# Copyright 2017 The Openstack-Helm Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HELM := helm +TASK := build + +EXCLUDES := helm-toolkit doc tests tools logs +CHARTS := helm-toolkit $(filter-out $(EXCLUDES), $(patsubst %/.,%,$(wildcard */.))) + +all: $(CHARTS) + +$(CHARTS): + @echo + @echo "===== Processing [$@] chart =====" + @make $(TASK)-$@ + +init-%: + if [ -f $*/Makefile ]; then make -C $*; fi + if [ -f $*/requirements.yaml ]; then helm dep up $*; fi + +lint-%: init-% + if [ -d $* ]; then $(HELM) lint $*; fi + +build-%: lint-% + if [ -d $* ]; then $(HELM) package $*; fi + +clean: + @echo "Removed .b64, _partials.tpl, and _globals.tpl files" + rm -rf helm-toolkit/secrets/*.b64 + rm -rf */templates/_partials.tpl + rm -rf */templates/_globals.tpl + +.PHONY: $(EXCLUDES) $(CHARTS) diff --git a/README.md b/README.md index 0cddf3f..6acaca7 100644 --- a/README.md +++ b/README.md @@ -1 +1,178 @@ -# divingbell \ No newline at end of file +Divingbell +========== + +What is it? +----------- + +Divingbell is a lightweight solution for: +1. Bare metal configuration management for a few very targeted use cases +2. Bare metal package manager orchestration + +What problems does it solve? +---------------------------- + +The needs identified for Divingbell were: +1. To plug gaps in day 1 tools (e.g., drydock) for node configuration +2. To provide a day 2 solution for managing these configurations going forward +3. [Future] To provide a day 2 solution for system level host patching + +Design and Implementation +------------------------- + +Divingbell daemonsets run as priviledged containers which mount the host +filesystem and chroot into that filesystem to enforce configuration and package +state. (The [diving bell](http://bit.ly/2hSXlai) analogue can be thought of as something that descends +into the deeps to facilitate work done down below the surface.) + +We use the daemonset construct as a way of getting a copy of each pod on every +node, but the work done by this chart's pods behaves like an event-driven job. +In practice this means that the chart internals run once on pod startup, +followed by an infinite sleep such that the pods always report a "Running" +status that k8s recognizes as the healthy (expected) result for a daemonset. + +In order to keep configuration as isolated as possible from other systems that +manage common files like /etc/fstab and /etc/sysctl.conf, Divingbell daemonsets +manage all of their configuration in separate files (e.g. by writing unique +files to /etc/sysctl.d or defining unique Systemd units) to avoid potential +conflicts. + +To maximize robustness and utility, the daemonsets in this chart are made to be +idempotent. In addition, they are designed to implicitly restore the original +system state after previously defined states are undefined. (e.g., removing a +previously defined mount from the yaml manifest, with no record of the original +mount in the updated manifest). + +Lifecycle management +-------------------- + +This chart's daemonsets will be spawned by Armada. They run in an event-driven +fashion: the idempotent automation for each daemonset will only re-run when +Armada spawns/respawns the container, or if information relevant to the host +changes in the configmap. + +For upgrades, a decision was taken not to use any of the built-in kubernetes +update strategies such as RollingUpdate. Instead, we are putting this on +Armada to handle the orchestration of how to do upgrades (e.g., rack by rack). + +Daemonset configs +----------------- + +### sysctl ### + +Used to manage host level sysctl tunables. Ex: + +``` yaml +conf: + sysctl: + net/ipv4/ip_forward: 1 + net/ipv6/conf/all/forwarding: 1 +``` + +### mounts ### + +used to manage host level mounts (outside of those in /etc/fstab). Ex: + +``` yaml +conf: + mounts: + mnt: + mnt_tgt: /mnt + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=1024M' +``` + +### ethtool ### + +Used to manage host level NIC tunables. Ex: + +``` yaml +conf: + ethtool: + ens3: + tx-tcp-segmentation: off + tx-checksum-ip-generic: on +``` + +### packages ### + +Not implemented + +### users ### + +Not implemented + +Node specific configurations +---------------------------- + +Although we expect these deamonsets to run indiscriminately on all nodes in the +infrastructure, we also expect that different nodes will need to be given a +different set of data depending on the node role/function. This chart supports +establishing value overrides for nodes with specific label value pairs and for +targeting nodes with specific hostnames. The overrided configuration is merged +with the normal config data, with the override data taking precedence. + +The chart will then generate one daemonset for each host and label override, in +addition to a default daemonset for which no overrides are applied. +Each daemonset generated will also exclude from its scheduling criteria all +other hosts and labels defined in other overrides for the same daemonset, to +ensure that there is no overlap of daemonsets (i.e., one and only one daemonset +of a given type for each node). + +Overrides example with sysctl daemonset: + +``` yaml +conf: + sysctl: + net.ipv4.ip_forward: 1 + net.ipv6.conf.all.forwarding: 1 + fs.file-max: 9999 + overrides: + divingbell-sysctl: + labels: + - label: + key: compute_type + values: + - "dpdk" + - "sriov" + conf: + sysctl: + net.ipv4.ip_forward: 0 + - label: + key: another_label + values: + - "another_value" + conf: + sysctl: + net.ipv6.conf.all.forwarding: 0 + hosts: + - name: superhost + conf: + sysctl: + net.ipv4.ip_forward: 0 + fs.file-max: 12345 + - name: superhost2 + conf: + sysctl: + fs.file-max: 23456 +``` + +Caveats: +1. For a given node, at most one override operation applies. If a node meets +override criteria for both a label and a host, then the host overrides take +precedence and are used for that node. The label overrides are not used in this +case. This is especially important to note if you are defining new host +overrides for a node that is already consuming matching label overrides, as +defining a host override would make those label overrides no longer apply. +2. Daemonsets are generated regardless of the current state of the environment. +Ex: If your environment consists of a single node that matches a host override, +the chart will still generate a default daemonset which would fail to schedule +in this example. Likewise if the host or label in the override return no +candidates, these would also fail to schedule. + +Recorded Demo +------------- + +A recorded demo of using divingbell can be found [here](https://asciinema.org/a/beJQZpRPdOctowW0Lxkxrhz17). + + diff --git a/TODO b/TODO new file mode 100644 index 0000000..e3d256c --- /dev/null +++ b/TODO @@ -0,0 +1,6 @@ +1. Put shell error handling into common area (helm-toolkit) +2. Perform revert operations in scripts before applying new settings +3. Refactor .sh into .py +4. Refactor each daemonset to follow the same general systemd pattern; have one +script that can do all of them to reduce code burden. +5. yaml validation: check for duplicate items, unused leaves, etc. diff --git a/divingbell/Chart.yaml b/divingbell/Chart.yaml new file mode 100644 index 0000000..0b9e475 --- /dev/null +++ b/divingbell/Chart.yaml @@ -0,0 +1,18 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +description: divingbell +name: divingbell +version: 0.1.0 diff --git a/divingbell/requirements.yaml b/divingbell/requirements.yaml new file mode 100644 index 0000000..7496230 --- /dev/null +++ b/divingbell/requirements.yaml @@ -0,0 +1,18 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +dependencies: + - name: helm-toolkit + repository: http://localhost:8879/charts + version: 0.1.0 diff --git a/divingbell/templates/bin/_ethtool.sh.tpl b/divingbell/templates/bin/_ethtool.sh.tpl new file mode 100644 index 0000000..b661743 --- /dev/null +++ b/divingbell/templates/bin/_ethtool.sh.tpl @@ -0,0 +1,225 @@ +#!/bin/bash + +{{/* +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/}} + +set -e + +cat <<'EOF' > {{ .chroot_mnt_path | quote }}/tmp/ethtool_host.sh +{{ include "divingbell.shcommon" . }} + +old_ethtool_path='/var/divingbell/ethtool' +persist_path='/etc/systemd/system' + +if [ ! -d "${old_ethtool_path}" ]; then + mkdir -p "${old_ethtool_path}" +fi + +write_test "${old_ethtool_path}" +write_test "${persist_path}" + +validate_operation(){ + local param="${1}" + shift + [ "${param}" = 'on' ] || [ "${param}" = 'off' ] || + die "Expected 'on' or 'off', got '${param}' $@" +} + +ethtool_bin="$(type -p ethtool)" + +add_ethtool_param(){ + die_if_null "${device}" ", 'device' env var not initialized" + ifconfig "${device}" > /dev/null # verify interface is here + die_if_null "${user_key}" ", 'user_key' env var not initialized" + die_if_null "${user_val}" ", 'user_val' env var not initialized" + # YAML parser converts unquoted 'on' and 'off' to boolean values + # ethtool only works with 'on' and 'off', not 'true' or 'false' + if [ "${user_val}" = 'true' ]; then + user_val='on' + elif [ "${user_val}" = 'false' ]; then + user_val='off' + fi + validate_operation "${user_val}" + : ${before:=docker.service} + : ${after=network-online.target} + + # Call systemd-escapae to get systemd required filename + local systemd_name + systemd_name="$(systemd-escape \ + -p --suffix=service "${device}.${user_key}")" + + # look for user requested value for this device + local param_data + param_data="$(${ethtool_bin} -k ${device} | grep "${user_key}:")" || + die "Could not find requested param ${user_key} for ${device}" + + local audit_item + audit_item="${device},${user_key},${user_val}" + audit_items="${audit_items}${audit_item}"$'\n' + + # extract existing setting for device + local current_val_raw + current_val_raw="$(echo "${param_data}" | cut -d':' -f2)" + [ "$(echo "${current_val_raw}" | wc -l)" -le 1 ] || + die "More than one match for '${user_key}'" + [[ ! ${current_val_raw} = *fixed* ]] || + die "'${deivce}' does not permit changing the '${user_key}' setting" + if [[ ${current_val_raw} = *off\ \[requested\ on\] ]]; then + current_val_raw='off' + elif [[ ${current_val_raw} = *on\ \[requested\ off\] ]]; then + current_val_raw='on' + fi + local current_val + current_val="$(echo "${current_val_raw}" | + cut -d':' -f2 | tr -d '[:space:]')" + die_if_null "${current_val}" "Value parse error on '${param_data}'" + validate_operation "${current_val}" "for '${user_key}' on '${device}'" + + # write the original system setting for this device parameter + local path_to_orig_val + path_to_orig_val="${old_ethtool_path}/${systemd_name}" + if [ ! -f "${path_to_orig_val}" ]; then + echo "${device} ${user_key} ${current_val}" > "${path_to_orig_val}" + fi + + # Read the original system setting for this device parameter and use it to + # build the service 'stop' command (i.e. revert to original state) + local stop_val + stop_val="$(cat "${path_to_orig_val}" | cut -d' ' -f3)" + validate_operation "${stop_val}" "from '${path_to_orig_val}'" + local stop_cmd + stop_cmd="${ethtool_bin} -K ${device} ${user_key} ${stop_val}" + + # Build service start command + local start_cmd + start_cmd="${ethtool_bin} -K ${device} ${user_key} ${user_val}" + + # Build the systemd unit file + file_content="[Unit] +Before=${before} +After=${after} + +[Service] +ExecStart=${start_cmd} +#ExecStop=${stop_cmd} + +[Install] +WantedBy=multi-user.target" + + local systemd_path="${persist_path}/${systemd_name}" + local restart_service='' + local service_updates='' + + if [ ! -f "${systemd_path}" ] || + [ "$(cat ${systemd_path})" != "${file_content}" ] + then + echo "${file_content}" > "${systemd_path}" + restart_service=true + service_updates=true + systemctl daemon-reload + fi + + if [ "${current_val}" != "${user_val}" ]; then + restart_service=true + fi + + if [ -n "${restart_service}" ]; then + systemctl restart "${systemd_name}" || die "Start failed: ${systemd_name}" + fi + + # Mark the service for auto-start on boot + systemctl is-enabled "${systemd_name}" > /dev/null || + systemctl enable "${systemd_name}" || + die "systemd persist failed: ${systemd_name}" + + log.INFO "Service successfully verified: ${systemd_name}" + + curr_ethtool="${curr_ethtool}${systemd_name}"$'\n' +} + +{{- range $iface, $unused := .ethtool }} + {{- range $ethtool_key, $ethtool_val := . }} + device={{ $iface | quote }} \ + user_key={{ $ethtool_key | quote }} \ + user_val={{ $ethtool_val | quote }} \ + add_ethtool_param + {{- end }} +{{- end }} + +# TODO: This should be done before applying new settings rather than after +# Revert any previously applied services which are now absent +prev_files="$(find "${old_ethtool_path}" -type f)" +if [ -n "${prev_files}" ]; then + basename -a ${prev_files} | sort > /tmp/prev_ethtool + echo "${curr_ethtool}" | sort > /tmp/curr_ethtool + revert_list="$(comm -23 /tmp/prev_ethtool /tmp/curr_ethtool)" + IFS=$'\n' + for prev_setting in ${revert_list}; do + unset IFS + args="$(cat "${old_ethtool_path}/${prev_setting}")" + set -- $args + ${ethtool_bin} -K "$@" + if [ -f "${persist_path}/${prev_setting}" ]; then + systemctl disable "${prev_setting}" + rm "${persist_path}/${prev_setting}" + fi + rm "${old_ethtool_path}/${prev_setting}" + log.INFO "Reverted ethtool settings: ${prev_setting}" + done +fi + +# Perform another pass on ethtool settings to identify any conflicting settings +# among those specified by the user. Enabling/disabling some NIC settings will +# implicitly enable/disable others. Ethtool reports conflicts for such +# parameters as 'off [requested on]' and 'on [requested off]' +for audit_item in ${audit_items}; do + device="$(echo "${audit_item}" | cut -d',' -f1)" + user_key="$(echo "${audit_item}" | cut -d',' -f2)" + user_val="$(echo "${audit_item}" | cut -d',' -f3)" + param_data="$(${ethtool_bin} -k ${device} | grep "${user_key}:")" + current_val="$(echo "${param_data}" | cut -d':' -f2 | tr -d '[:space:]')" + if [[ ${current_val} != ${user_val}* ]]; then + if [[ ${param_data} = *\[requested\ on\] ]] || + [[ ${param_data} = *\[requested\ off\] ]] + then + log.ERROR 'There is a conflict between settings chosen for this device.' + fi + die "Validation failure: Requested '${user_key}' to be set to" \ + "'${user_val}' on '${device}'; got '${param_data}'." + fi +done + +if [ -n "${curr_ethtool}" ]; then + log.INFO 'All ethtool successfully validated on this node.' +else + log.WARN 'No ethtool overrides defined for this node.' +fi + +exit 0 +EOF + +chmod 755 {{ .chroot_mnt_path | quote }}/tmp/ethtool_host.sh +chroot {{ .chroot_mnt_path | quote }} /tmp/ethtool_host.sh + +sleep 1 +echo 'INFO Putting the daemon to sleep.' + +while [ 1 ]; do + sleep 300 +done + +exit 0 + diff --git a/divingbell/templates/bin/_mounts.sh.tpl b/divingbell/templates/bin/_mounts.sh.tpl new file mode 100644 index 0000000..ec14ff3 --- /dev/null +++ b/divingbell/templates/bin/_mounts.sh.tpl @@ -0,0 +1,155 @@ +#!/bin/bash + +{{/* +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/}} + +set -e + +cat <<'EOF' > {{ .chroot_mnt_path | quote }}/tmp/mounts_host.sh +{{ include "divingbell.shcommon" . }} + +old_mounts_path='/var/divingbell/mounts' +persist_path='/etc/systemd/system' + +if [ ! -d "${old_mounts_path}" ]; then + mkdir -p "${old_mounts_path}" +fi + +write_test "${old_mounts_path}" +write_test "${persist_path}" + +add_mounts_param(){ + die_if_null "${device}" ", 'device' env var not initialized" + die_if_null "${mnt_tgt}" ", 'mnt_tgt' env var not initialized" + die_if_null "${type}" ", 'type' env var not initialized" + : ${options:=None} + : ${before:=docker.service} + : ${after=network-online.target} + + # Create mount target + if [ ! -d "${mnt_tgt}" ]; then + mkdir -p "${mnt_tgt}" + fi + + # Call systemd-escapae to get systemd required filename for the mount + local systemd_name + systemd_name="$(systemd-escape -p --suffix=mount "${mnt_tgt}")" + + # Prepare systemd entry + + local mnt_opts_systemd='' + if [ ! "${options}" = 'None' ]; then + mnt_opts_systemd="Options=${options}" + fi + + file_content="[Unit] +Conflicts=umount.target +Before=${before} +After=${after} + +[Mount] +What=${device} +Where=${mnt_tgt} +Type=${type} +${mnt_opts_systemd} + +[Install] +WantedBy=local-fs.target" + + local mountfile_path="${persist_path}/${systemd_name}" + local restart_mount='' + local mnt_updates='' + + if [ ! -f "${mountfile_path}" ] || + [ "$(cat ${mountfile_path})" != "${file_content}" ] + then + echo "${file_content}" > "${mountfile_path}" + restart_mount=true + mnt_updates=true + systemctl daemon-reload + fi + + systemctl is-active "${systemd_name}" > /dev/null || restart_mount=true + + # Perform the mount + if [ -n "${restart_mount}" ]; then + systemctl restart "${systemd_name}" || die "Mount failed: ${systemd_name}" + fi + + # Mark the mount for auto-start on boot + systemctl is-enabled "${systemd_name}" > /dev/null || + systemctl enable "${systemd_name}" || + die "Mount persisting failed: ${systemd_name}" + + # Store orchestrated mount info in the event the mount is + # later reverted (removed) from the configmap + if [ -n "${mnt_updates}" ]; then + cp "${mountfile_path}" "${old_mounts_path}" + fi + + log.INFO "Mount successfully verified: ${mnt_tgt}" + + curr_mounts="${curr_mounts}${systemd_name}"$'\n' +} + +{{- range .mounts }} + {{- range $key, $value := . }} + {{ $key }}={{ $value | quote }} \ + {{- end }} + add_mounts_param +{{- end }} + +# TODO: We should purge all old mounts first (umount them) before applying +# new mounts +# Revert any previously applied mounts which are now absent +prev_files="$(find "${old_mounts_path}" -type f)" +if [ -n "${prev_files}" ]; then + basename -a ${prev_files} | sort > /tmp/prev_mounts + echo "${curr_mounts}" | sort > /tmp/curr_mounts + revert_list="$(comm -23 /tmp/prev_mounts /tmp/curr_mounts)" + IFS=$'\n' + for prev_mount in ${revert_list}; do + if [ -f "${persist_path}/${prev_mount}" ]; then + systemctl stop "${prev_mount}" + systemctl disable "${prev_mount}" + rm "${persist_path}/${prev_mount}" + fi + rm "${old_mounts_path}/${prev_mount}" + log.INFO "Reverted mount: ${prev_mount}" + done +fi + +if [ -n "${curr_mounts}" ]; then + log.INFO 'All mounts successfully validated on this node.' +else + log.WARN 'No mounts defined for this node.' +fi + +exit 0 +EOF + +chmod 755 {{ .chroot_mnt_path | quote }}/tmp/mounts_host.sh +chroot {{ .chroot_mnt_path | quote }} /tmp/mounts_host.sh + +sleep 1 +echo 'INFO Putting the daemon to sleep.' + +while [ 1 ]; do + sleep 300 +done + +exit 0 + diff --git a/divingbell/templates/bin/_shcommon.tpl b/divingbell/templates/bin/_shcommon.tpl new file mode 100644 index 0000000..6310f53 --- /dev/null +++ b/divingbell/templates/bin/_shcommon.tpl @@ -0,0 +1,109 @@ +{{- define "divingbell.shcommon" -}} +#!/bin/bash + +set -o errtrace +set -o pipefail + +declare -Ax __log_types=( +{{- if .log_colors }} + [ERROR]='fd=2, color=\e[01;31m' + [TRACE]='fd=2, color=\e[01;31m' + [WARN]='fd=1, color=\e[01;93m' + [INFO]='fd=1, color=\e[01;37m' + [DEBUG]='fd=1, color=\e[01;90m' +{{- else }} + [ERROR]='fd=2,' + [TRACE]='fd=2,' + [WARN]='fd=1,' + [INFO]='fd=1,' + [DEBUG]='fd=1,' +{{- end }} +) +for __log_type in "${!__log_types[@]}"; do + alias log.${__log_type}="echo ${__log_type}" +done +shopt -s expand_aliases + +__text_formatter(){ + local log_prefix='None' + local default_log_type='INFO' + local default_xtrace_type='DEBUG' + local log_type + local color_prefix + local fd + for log_type in "${!__log_types[@]}"; do + if [[ ${1} == ${log_type}* ]]; then + log_prefix='' + color_prefix="$(echo ${__log_types["${log_type}"]} | + cut -d',' -f2 | cut -d'=' -f2)" + fd="$(echo ${__log_types["${log_type}"]} | + cut -d',' -f1 | cut -d'=' -f2)" + break + fi + done + if [ "${log_prefix}" = "None" ]; then + # xtrace output usually begins with "+" or "'", mark as debug + if [[ ${1} = '+'* ]] || [[ ${1} = \'* ]]; then + log_prefix="${default_xtrace_type} " + log_type="${default_xtrace_type}" + else + log_prefix="${default_log_type} " + log_type="${default_log_type}" + fi + color_prefix="$(echo ${__log_types["${log_type}"]} | + cut -d',' -f2 | cut -d'=' -f2)" + fd="$(echo ${__log_types["${log_type}"]} | + cut -d',' -f1 | cut -d'=' -f2)" + fi + local color_suffix='' + if [ -n "${color_prefix}" ]; then + color_suffix='\e[0m' + fi + echo -e "${color_prefix}${log_prefix}${1}${color_suffix}" >&${fd} +} +# Due to this unresolved issue: http://bit.ly/2xPmOY9 we choose preservation of +# message ordering at the expense of applying appropriate tags to stderr. As a +# result, stderr from subprocesses will still display as INFO level messages. +# However we can still log ERROR messages using the aliased log handlers. +exec >& >(while read line; do + if [ "${line}" = '__EXIT_MARKER__' ]; then + break + else + __text_formatter "${line}" + fi + done) + +die(){ + set +x + # write to stderr any passed error message + if [[ $@ = *[!\ ]* ]]; then + log.ERROR "$@" + fi + log.TRACE "Backtrace:" + for ((i=0;i<${#FUNCNAME[@]}-1;i++)); do + log.TRACE $(caller $i) + done + echo __EXIT_MARKER__ + # Exit after pipe closes to ensure all output is flushed first + while : ; do + echo "Waiting on exit..." || exit 1 + done +} +export -f die +trap 'die' ERR +set -x + +write_test(){ + touch "${1}/__write_test" && + rm "${1}/__write_test" || + die "Write test to ${1} failed." +} + +die_if_null(){ + local var="${1}" + shift + [ -n "${var}" ] || die "Null variable exception $@" +} + +############################################################################### +{{- end -}} diff --git a/divingbell/templates/bin/_sysctl.sh.tpl b/divingbell/templates/bin/_sysctl.sh.tpl new file mode 100644 index 0000000..3da3521 --- /dev/null +++ b/divingbell/templates/bin/_sysctl.sh.tpl @@ -0,0 +1,139 @@ +#!/bin/bash + +{{/* +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +*/}} + +set -e + +cat <<'EOF' > {{ .chroot_mnt_path | quote }}/tmp/sysctl_host.sh +{{ include "divingbell.shcommon" . }} + +# TODO: Make prefix configurable to control param loading order +fname_prefix='60-divingbell-' +defaults_path='/var/divingbell/sysctl' +persist_path='/etc/sysctl.d' +reload_system_configs=false + +if [ ! -d "${defaults_path}" ]; then + mkdir -p "${defaults_path}" +fi + +write_test "${defaults_path}" +write_test "${persist_path}" + +add_sysctl_param(){ + local user_key="${1}" + die_if_null "${user_key}" ", 'user_key' not supplied to function" + local user_val="${2}" + die_if_null "${user_val}" ", 'user_val' not supplied to function" + + # Try reading the current sysctl tunable param / value + # If sysctl cannot find the specified tunable, script will exit here + local system_key_val_pair + system_key_val_pair="$(sysctl $user_key)" + + # For further operation, use the tunable name returned by sysctl above, + # rather than the one specified by the user. + # sysctl gives a consistently formatted tunable (e.g., net.ipv4.ip_forward) + # regardless of input format (e.g., net/ipv4/ip_forward). + local system_key + system_key="$(echo ${system_key_val_pair} | + cut -d'=' -f1 | tr -d '[:space:]')" + [ -n "${system_key}" ] || die 'Null variable exception' + + # Store current kernel sysctl default in the event we need to restore later + # But only if it is the first time we are changing the tunable, + # to capture the orignal value. + local system_val + system_val="$(echo ${system_key_val_pair} | + cut -d'=' -f2 | tr -d '[:space:]')" + [ -n "${system_val}" ] || die 'Null variable exception' + local orig_val="${defaults_path}/${fname_prefix}${system_key}.conf" + if [ ! -f "${orig_val}" ]; then + echo "${system_key_val_pair}" > "${orig_val}" + fi + + # Apply new setting. If an invalid value were provided, sysctl would choke + # here, before making the change persistent. + if [ "${user_val}" != "${system_val}" ]; then + sysctl -w "${system_key}=${user_val}" + fi + + # Persist the new setting + file_content="${system_key}=${user_val}" + file_path="${persist_path}/${fname_prefix}${system_key}.conf" + if [ -f "${file_path}" ] && + [ "$(cat ${file_path})" != "${file_content}" ] || + [ ! -f "${file_path}" ] + then + echo "${file_content}" > "${file_path}" + reload_system_configs=true + log.INFO "Sysctl setting applied: ${system_key}=${user_val}" + else + log.INFO "No changes made to sysctl param: ${system_key}=${user_val}" + fi + + curr_settings="${curr_settings}${fname_prefix}${system_key}.conf"$'\n' +} + +{{- range $key, $value := .sysctl }} +add_sysctl_param {{ $key | quote }} {{ $value | quote }} +{{- end }} + +# Revert any previously applied sysctl settings which are now absent +prev_files="$(find "${defaults_path}" -type f)" +if [ -n "${prev_files}" ]; then + basename -a ${prev_files} | sort > /tmp/prev_settings + echo "${curr_settings}" | sort > /tmp/curr_settings + revert_list="$(comm -23 /tmp/prev_settings /tmp/curr_settings)" + IFS=$'\n' + for orig_sysctl_setting in ${revert_list}; do + rm "${persist_path}/${orig_sysctl_setting}" + sysctl -p "${defaults_path}/${orig_sysctl_setting}" + rm "${defaults_path}/${orig_sysctl_setting}" + reload_system_configs=true + log.INFO "Reverted sysctl setting:" \ + "$(cat "${defaults_path}/${orig_sysctl_setting}")" + done +fi + +# Final validation of sysctl settings written to /etc/sysctl.d +# Also allows for nice play with other automation (or manual) systems that +# may have separate overrides for reverted tunables. +if [ "${reload_system_configs}" = "true" ]; then + sysctl --system +fi + +if [ -n "${curr_settings}" ]; then + log.INFO 'All sysctl configuration successfully validated on this node.' +else + log.WARN 'No syctl overrides defined for this node.' +fi + +exit 0 +EOF + +chmod 755 {{ .chroot_mnt_path | quote }}/tmp/sysctl_host.sh +chroot {{ .chroot_mnt_path | quote }} /tmp/sysctl_host.sh + +sleep 1 +echo 'INFO Putting the daemon to sleep.' + +while [ 1 ]; do + sleep 300 +done + +exit 0 diff --git a/divingbell/templates/daemonsets.yaml b/divingbell/templates/daemonsets.yaml new file mode 100644 index 0000000..89ff97a --- /dev/null +++ b/divingbell/templates/daemonsets.yaml @@ -0,0 +1,252 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +{{- $daemonsets := list "sysctl" "mounts" "ethtool" }} +{{- $envAll := . }} +{{- range $daemonset := $daemonsets }} + + {{- $daemonset_root_name := printf (print $.Chart.Name "-" $daemonset) }} + {{- set $.Values "__daemonset_list" list }} + {{- set $.Values "__default" dict }} + {{- if hasKey $.Values.conf "overrides" }} + {{- range $key, $val := $.Values.conf.overrides }} + + {{- if eq $key $daemonset_root_name }} + {{- range $type, $type_data := . }} + + {{- if eq $type "hosts" }} + {{- range $host_data := . }} + # dictionary that will contain all info needed to generate this + # iteration of the daemonset + {{- $current_dict := dict }} + + # generate daemonset name + {{- $name := printf (print $daemonset_root_name "-" $host_data.name) }} + {{- set $current_dict "name" $name }} + + # apply overrides + {{- $override_conf_copy := $host_data.conf }} + # Omit overrides because they are not needed, and + # because it causes a stack overflow during compilation + {{- $root_conf_copy := omit $.Values.conf "overrides" }} + {{- $merged_dict := merge $override_conf_copy $root_conf_copy }} + {{- set $current_dict "nodeData" $merged_dict }} + + # Schedule to this host explicitly. + {{- $nodeSelector_dict := dict }} + + {{- set $nodeSelector_dict "key" "kubernetes.io/hostname" }} + {{- set $nodeSelector_dict "operator" "In" }} + + {{- $values_list := list $host_data.name }} + {{- set $nodeSelector_dict "values" $values_list }} + + {{- $list_aggregate := list $nodeSelector_dict }} + {{- set $current_dict "matchExpressions" $list_aggregate }} + + # store completed daemonset entry/info into global list + {{- $list_aggregate := append $.Values.__daemonset_list $current_dict }} + {{- set $.Values "__daemonset_list" $list_aggregate }} + + {{- end }} + {{- end }} + + {{- if eq $type "labels" }} + {{- range $label_data := . }} + # dictionary that will contain all info needed to generate this + # iteration of the daemonset. + {{- set $.Values "__current_label" dict }} + + # generate daemonset name + {{- $name := printf (print $daemonset_root_name "-" $label_data.label.key) }} + {{- set $.Values.__current_label "name" $name }} + + # apply overrides + {{- $override_conf_copy := $label_data.conf }} + # Omit overrides because they are not needed, and + # because it causes a stack overflow during compilation + {{- $root_conf_copy := omit $.Values.conf "overrides" }} + {{- $merged_dict := merge $override_conf_copy $root_conf_copy }} + {{- set $.Values.__current_label "nodeData" $merged_dict }} + + # Schedule to the provided label value(s) + {{- $label_dict := omit $label_data.label "NULL" }} + {{- set $label_dict "operator" "In" }} + {{- $list_aggregate := list $label_dict }} + {{- set $.Values.__current_label "matchExpressions" $list_aggregate }} + + # Do not schedule to any other specified labels + {{- $other_labels := without $type_data $label_data }} + {{- range $label_data2 := $other_labels }} + {{- $label_dict := omit $label_data2.label "NULL" }} + + {{- set $label_dict "operator" "NotIn" }} + + {{- $list_aggregate := append $.Values.__current_label.matchExpressions $label_dict }} + {{- set $.Values.__current_label "matchExpressions" $list_aggregate }} + {{- end }} + + # Do not schedule to any other specified hosts + {{- range $type, $type_data := $val }} + {{- if eq $type "hosts" }} + {{- range $host_data := . }} + {{- $label_dict := dict }} + + {{- set $label_dict "key" "kubernetes.io/hostname" }} + {{- set $label_dict "operator" "NotIn" }} + + {{- $values_list := list $host_data.name }} + {{- set $label_dict "values" $values_list }} + + {{- $list_aggregate := append $.Values.__current_label.matchExpressions $label_dict }} + {{- set $.Values.__current_label "matchExpressions" $list_aggregate }} + {{- end }} + {{- end }} + {{- end }} + + # store completed daemonset entry/info into global list + {{- $list_aggregate := append $.Values.__daemonset_list $.Values.__current_label }} + {{- set $.Values "__daemonset_list" $list_aggregate }} + {{- unset $.Values "__current_label" }} + + {{- end }} + {{- end }} + {{- end }} + + # scheduler exceptions for the default daemonset + {{- set $.Values.__default "matchExpressions" list }} + + {{- range $type, $type_data := . }} + # Do not schedule to other specified labels + {{- if eq $type "labels" }} + {{- range $label_data := . }} + {{- $default_dict := omit $label_data.label "NULL" }} + + {{- set $default_dict "operator" "NotIn" }} + + {{- $list_aggregate := append $.Values.__default.matchExpressions $default_dict }} + {{- set $.Values.__default "matchExpressions" $list_aggregate }} + {{- end }} + {{- end }} + # Do not schedule to other specified hosts + {{- if eq $type "hosts" }} + {{- range $host_data := . }} + {{- $default_dict := dict }} + + {{- set $default_dict "key" "kubernetes.io/hostname" }} + {{- set $default_dict "operator" "NotIn" }} + + {{- $values_list := list $host_data.name }} + {{- set $default_dict "values" $values_list }} + + {{- $list_aggregate := append $.Values.__default.matchExpressions $default_dict }} + {{- set $.Values.__default "matchExpressions" $list_aggregate }} + {{- end }} + {{- end }} + {{- end }} + + {{- end }} + {{- end }} + {{- end }} + + # generate the default daemonset + + # name + {{- $name := printf (print $daemonset_root_name "-default") }} + {{- set $.Values.__default "name" $name }} + + # use values.conf as-is, minus overrides + {{- $root_conf_copy := omit $.Values.conf "overrides" }} + {{- set $.Values.__default "nodeData" $root_conf_copy }} + + # add to global list + {{- $list_aggregate := append $.Values.__daemonset_list $.Values.__default }} + {{- set $.Values "__daemonset_list" $list_aggregate }} + {{- unset $.Values "__default" }} + + {{- range $current_dict := $.Values.__daemonset_list }} + {{- $template_location := printf (print "bin/_" $daemonset ".sh.tpl") | toString -}} + # referenced by helm toolkit + {{- set $current_dict.nodeData "Template" dict -}} + {{- set $current_dict.nodeData.Template "Name" $.Template.Name }} + # name needs to be a DNS-1123 compliant name + # generate a uuid since the version of sprig packaged with helm 2.5.1 does + # not contain regex* functions. As a result we wont use $current_dict.name + {{- $dns_1123_name := printf (print $.Chart.Name "-" $daemonset "-" uuidv4) | lower }} +--- +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + name: {{ $dns_1123_name }} +spec: + template: + metadata: + labels: +{{ list $envAll $.Chart.Name $daemonset | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }} + annotations: + values-hash: {{ list $current_dict.nodeData . | quote | sha256sum }} + spec: + {{- if hasKey $current_dict "matchExpressions" }} + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + {{- range $matchExpression := $current_dict.matchExpressions }} + - key: {{ $matchExpression.key }} + operator: {{ $matchExpression.operator }} + values: + {{- range $schedule_val := $matchExpression.values }} + - {{ $schedule_val | quote }} + {{- end }} + {{- end }} + {{- end }} + hostNetwork: true + hostPID: true + hostIPC: true + containers: + - name: {{ $dns_1123_name }} + image: {{ $.Values.images.divingbell }} + imagePullPolicy: {{ $.Values.images.pull_policy }} + command: + - /tmp/{{ $daemonset }}.sh + volumeMounts: + - name: rootfs-{{ $daemonset }} + mountPath: {{ $.Values.conf.chroot_mnt_path }} + - name: {{ $dns_1123_name }} + mountPath: /tmp/{{ $daemonset }}.sh + subPath: {{ $daemonset }} + readOnly: true + securityContext: + privileged: true + volumes: + - name: rootfs-{{ $daemonset }} + hostPath: + path: / + - name: {{ $dns_1123_name }} + configMap: + name: {{ $dns_1123_name }} + defaultMode: 0555 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ $dns_1123_name }} +data: + {{ $daemonset }}: |+ +{{- tuple $current_dict.nodeData $template_location $current_dict.nodeData | include "helm-toolkit.utils.configmap_templater" | indent 4 }} + {{- end -}} + {{- unset $.Values "__daemonset_list" -}} +{{- end -}} + diff --git a/divingbell/tools/gate/setup.sh b/divingbell/tools/gate/setup.sh new file mode 100755 index 0000000..0f23952 --- /dev/null +++ b/divingbell/tools/gate/setup.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +set -ex + +export TMP_DIR=$(mktemp -d) +cd $TMP_DIR +git clone https://git.openstack.org/openstack/openstack-helm +cd openstack-helm/tools/gate/ +./setup_gate.sh diff --git a/divingbell/tools/gate/test.sh b/divingbell/tools/gate/test.sh new file mode 100755 index 0000000..26093cf --- /dev/null +++ b/divingbell/tools/gate/test.sh @@ -0,0 +1,708 @@ +#!/bin/bash + +# TODO: Convert to use new/common gate scripts when available + +set -e + +NAME=divingbell +: ${LOGS_DIR:=/var/log} +: ${LOGS_SUBDIR:=${LOGS_DIR}/${NAME}/$(date +"%m-%d-%y_%H:%M:%S")} +mkdir -p "${LOGS_SUBDIR}" +LOG_NAME="${LOGS_SUBDIR}/test.log" +TEST_RESULTS="${LOGS_SUBDIR}/results.log" +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" +cd "${DIR}" +BASE_VALS="--values=${DIR}/../../values.yaml" +SYSCTL_KEY1=net.ipv4.conf.all.log_martians +SYSCTL_VAL1_DEFAULT=1 +SYSCTL_KEY2=net.ipv4.conf.all.secure_redirects +SYSCTL_VAL2_DEFAULT=1 +SYSCTL_KEY3=net.ipv4.conf.all.accept_redirects +SYSCTL_VAL3_DEFAULT=0 +SYSCTL_KEY4=net/ipv6/conf/all/accept_redirects +SYSCTL_VAL4_DEFAULT=0 +MOUNTS_SYSTEMD=/${NAME} +MOUNTS_PATH1=${MOUNTS_SYSTEMD}1 +MOUNTS_PATH2=${MOUNTS_SYSTEMD}2 +MOUNTS_PATH3=${MOUNTS_SYSTEMD}3 +ETHTOOL_KEY2=tx-tcp-segmentation +ETHTOOL_VAL2_DEFAULT=on +ETHTOOL_KEY3=tx-tcp6-segmentation +ETHTOOL_VAL3_DEFAULT=on +ETHTOOL_KEY4=tx-nocache-copy +ETHTOOL_VAL4_DEFAULT=off +ETHTOOL_KEY5=tx-checksum-ip-generic +ETHTOOL_VAL5_DEFAULT=on +nic_info="$(lshw -class network)" +physical_nic='' +IFS=$'\n' +for line in ${nic_info}; do + if [[ ${line} = *'physical id:'* ]]; then + physical_nic=true + fi + if [ "${physical_nic}" = 'true' ] && [[ ${line} = *'logical name'* ]]; then + DEVICE="$(echo "${line}" | cut -d':' -f2 | tr -d '[:space:]')" + echo "Found deivce: '${DEVICE}' to use for ethtool testing" + break + fi +done +[ -n "${DEVICE}" ] || (echo Could not find physical NIC for tesing; exit 1) + +exec >& >(while read line; do echo "${line}" | sudo tee -a ${LOG_NAME}; done) + +set -x + +purge_containers(){ + local chart_status="$(helm list ${NAME})" + if [ -n "${chart_status}" ]; then + helm delete --purge ${NAME} + fi +} + +__set_systemd_name(){ + if [ "${2}" = 'mount' ]; then + SYSTEMD_NAME="$(systemd-escape -p --suffix=mount "${1}")" + else + SYSTEMD_NAME="$(systemd-escape -p --suffix=service "${1}")" + fi +} + +_teardown_systemd(){ + __set_systemd_name "${1}" "${2}" + sudo systemctl stop "${SYSTEMD_NAME}" >& /dev/null || true + sudo systemctl disable "${SYSTEMD_NAME}" >& /dev/null || true + sudo rm "/etc/systemd/system/${SYSTEMD_NAME}" >& /dev/null || true +} + +clean_persistent_files(){ + sudo rm -r /var/${NAME} >& /dev/null || true + sudo rm -r /etc/sysctl.d/60-${NAME}-* >& /dev/null || true + _teardown_systemd ${MOUNTS_PATH1} mount + _teardown_systemd ${MOUNTS_PATH2} mount + _teardown_systemd ${MOUNTS_PATH3} mount + sudo systemctl daemon-reload +} + +_write_sysctl(){ + sudo /sbin/sysctl -w ${1}=${2} +} + +_write_ethtool(){ + local cur_val + cur_val="$(/sbin/ethtool -k ${1} | + grep "${2}:" | cut -d':' -f2 | cut -d' ' -f2)" + if [ "${cur_val}" != "${3}" ]; then + sudo /sbin/ethtool -K ${1} ${2} ${3} || true + fi +} + +init_default_state(){ + if [ "${1}" = 'make' ]; then + (cd ../../../; make) + fi + purge_containers + clean_persistent_files + # set sysctl original vals + _write_sysctl ${SYSCTL_KEY1} ${SYSCTL_VAL1_DEFAULT} + _write_sysctl ${SYSCTL_KEY2} ${SYSCTL_VAL2_DEFAULT} + _write_sysctl ${SYSCTL_KEY3} ${SYSCTL_VAL3_DEFAULT} + _write_sysctl ${SYSCTL_KEY4} ${SYSCTL_VAL4_DEFAULT} + # set ethtool original vals + _write_ethtool ${DEVICE} ${ETHTOOL_KEY2} ${ETHTOOL_VAL2_DEFAULT} + _write_ethtool ${DEVICE} ${ETHTOOL_KEY3} ${ETHTOOL_VAL3_DEFAULT} + _write_ethtool ${DEVICE} ${ETHTOOL_KEY4} ${ETHTOOL_VAL4_DEFAULT} + _write_ethtool ${DEVICE} ${ETHTOOL_KEY5} ${ETHTOOL_VAL5_DEFAULT} +} + +install(){ + purge_containers + helm install --name="${NAME}" --debug "../../../${NAME}" --namespace="${NAME}" "$@" +} + +upgrade(){ + helm upgrade --name="${NAME}" --debug "../../../${NAME}" --namespace="${NAME}" "$@" +} + +dry_run(){ + helm install --name="${NAME}" --dry-run --debug "../../../${NAME}" --namespace="${NAME}" "$@" +} + +get_container_status(){ + local deployment="${1}" + container="$(kubectl get pods --namespace="${NAME}" | grep ${NAME}-${deployment} | cut -d' ' -f1)" + local log_connect_timeout=30 + local log_connect_sleep_interval=2 + local wait_time=0 + while : ; do + kubectl logs "${container}" --namespace="${NAME}" > /dev/null && break || + echo "Waiting for container logs..." && + wait_time=$((${wait_time} + ${log_connect_sleep_interval})) && + sleep ${log_connect_sleep_interval} + if [ ${wait_time} -ge ${log_connect_timeout} ]; then + echo "Hit timeout while waiting for container logs to become available." + exit 1 + fi + done + local container_runtime_timeout=210 + local container_runtime_sleep_interval=5 + wait_time=0 + while : ; do + CLOGS="$(kubectl logs --namespace="${NAME}" "${container}" 2>&1)" + local status="$(echo "${CLOGS}" | tail -1)" + if [[ ${status} = *ERROR* ]] || [[ ${status} = *TRACE* ]]; then + if [ "${2}" = 'expect_failure' ]; then + echo 'Pod exited as expected' + break + else + echo 'Expected pod to complete successfully, but pod reported errors' + echo 'pod logs:' + echo "${CLOGS}" + exit 1 + fi + elif [ "${status}" = 'INFO Putting the daemon to sleep.' ]; then + if [ "${2}" = 'expect_failure' ]; then + echo 'Expected pod to die with error, but pod completed successfully' + echo 'pod logs:' + echo "${CLOGS}" + exit 1 + else + echo 'Pod completed without errors.' + break + fi + else + wait_time=$((${wait_time} + ${container_runtime_sleep_interval})) + sleep ${container_runtime_sleep_interval} + fi + if [ ${wait_time} -ge ${container_runtime_timeout} ]; then + echo 'Hit timeout while waiting for container to complete work.' + break + fi + done +} + +_test_sysctl_default(){ + test "$(/sbin/sysctl "${1}" | cut -d'=' -f2 | tr -d '[:space:]')" = "${2}" +} + +_test_sysctl_value(){ + _test_sysctl_default "${1}" "${2}" + local key="${1//\//.}" + test "$(cat /etc/sysctl.d/60-${NAME}-${key}.conf)" = "${key}=${2}" +} + +_test_clog_msg(){ + [[ $CLOGS = *${1}* ]] || + (echo "Did not find expected string: '${1}'" + echo "in container logs:" + echo "${CLOGS}" + exit 1) +} + +alias install_base="install ${BASE_VALS}" +alias dry_run_base="dry_run ${BASE_VALS}" +shopt -s expand_aliases + +test_sysctl(){ + # Test the first set of values + local overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-set1.yaml + local val1=0 + local val2=1 + local val3=0 + local val4=0 + echo "conf: + sysctl: + $SYSCTL_KEY1: $val1 + $SYSCTL_KEY2: $val2 + $SYSCTL_KEY3: $val3 + $SYSCTL_KEY4: $val4" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status sysctl + _test_sysctl_value $SYSCTL_KEY1 $val1 + _test_sysctl_value $SYSCTL_KEY2 $val2 + _test_sysctl_value $SYSCTL_KEY3 $val3 + _test_sysctl_value $SYSCTL_KEY4 $val4 + echo '[SUCCESS] sysctl test1 passed successfully' >> "${TEST_RESULTS}" + + # Test an updated set of values + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-set2.yaml + val1=1 + val2=0 + val3=1 + val4=1 + echo "conf: + sysctl: + $SYSCTL_KEY1: $val1 + $SYSCTL_KEY2: $val2 + $SYSCTL_KEY3: $val3 + $SYSCTL_KEY4: $val4" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status sysctl + _test_sysctl_value $SYSCTL_KEY1 $val1 + _test_sysctl_value $SYSCTL_KEY2 $val2 + _test_sysctl_value $SYSCTL_KEY3 $val3 + _test_sysctl_value $SYSCTL_KEY4 $val4 + echo '[SUCCESS] sysctl test2 passed successfully' >> "${TEST_RESULTS}" + + # Test revert/rollback functionality + install_base + get_container_status sysctl + _test_sysctl_default $SYSCTL_KEY1 $SYSCTL_VAL1_DEFAULT + _test_sysctl_default $SYSCTL_KEY2 $SYSCTL_VAL2_DEFAULT + _test_sysctl_default $SYSCTL_KEY3 $SYSCTL_VAL3_DEFAULT + _test_sysctl_default $SYSCTL_KEY4 $SYSCTL_VAL4_DEFAULT + echo '[SUCCESS] sysctl test3 passed successfully' >> "${TEST_RESULTS}" + + # Test invalid key + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid1.yaml + echo "conf: + sysctl: + this.is.a.bogus.key: 1" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status sysctl expect_failure + _test_clog_msg 'sysctl: cannot stat /proc/sys/this/is/a/bogus/key: No such file or directory' + echo '[SUCCESS] sysctl test4 passed successfully' >> "${TEST_RESULTS}" + + # Test invalid val + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid2.yaml + echo "conf: + sysctl: + $SYSCTL_KEY1: bogus" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + # Sysctl does not report a non-zero exit code for this failure condition per + # https://bugzilla.redhat.com/show_bug.cgi?id=1264080 + get_container_status sysctl + _test_clog_msg 'sysctl: setting key "net.ipv4.conf.all.log_martians": Invalid argument' + echo '[SUCCESS] sysctl test5 passed successfully' >> "${TEST_RESULTS}" +} + +_test_if_mounted_positive(){ + mountpoint "${1}" || (echo "Expect ${1} to be mounted, but was not"; exit 1) + df -h | grep "${1}" | grep "${2}" || + (echo "Did not find expected mount size of ${2} in mount table"; exit 1) + __set_systemd_name "${1}" mount + systemctl is-enabled "${SYSTEMD_NAME}" || + (echo "Expect ${SYSTEMD_NAME} to be flagged to start on boot, but is not" + exit 1) +} + +_test_if_mounted_negative(){ + mountpoint "${1}" && + (echo "Expect ${1} not to be mounted, but was" + exit 1) || true + __set_systemd_name "${1}" mount + systemctl is-enabled "${SYSTEMD_NAME}" && + (echo "Expect ${SYSTEMD_NAME} not to be flagged to start on boot, but was" + exit 1) || true +} + +test_mounts(){ + # Test the first set of values + local overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-set1.yaml + local mount_size=32M + echo "conf: + mounts: + mnt: + mnt_tgt: ${MOUNTS_PATH1} + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=${mount_size}' + mnt2: + mnt_tgt: ${MOUNTS_PATH2} + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=${mount_size}' + mnt3: + mnt_tgt: ${MOUNTS_PATH3} + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=${mount_size}' + before: ntp.service + after: dbus.service" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status mounts + _test_if_mounted_positive ${MOUNTS_PATH1} ${mount_size} + _test_if_mounted_positive ${MOUNTS_PATH2} ${mount_size} + _test_if_mounted_positive ${MOUNTS_PATH3} ${mount_size} + echo '[SUCCESS] mounts test1 passed successfully' >> "${TEST_RESULTS}" + + # Test an updated set of values + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-set2.yaml + mount_size=30M + echo "conf: + mounts: + mnt: + mnt_tgt: ${MOUNTS_PATH1} + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=${mount_size}' + mnt2: + mnt_tgt: ${MOUNTS_PATH2} + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=${mount_size}' + mnt3: + mnt_tgt: ${MOUNTS_PATH3} + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=${mount_size}' + before: ntp.service + after: dbus.service" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status mounts + _test_if_mounted_positive ${MOUNTS_PATH1} ${mount_size} + _test_if_mounted_positive ${MOUNTS_PATH2} ${mount_size} + _test_if_mounted_positive ${MOUNTS_PATH3} ${mount_size} + echo '[SUCCESS] mounts test2 passed successfully' >> "${TEST_RESULTS}" + + # Test revert/rollback functionality + install_base + get_container_status mounts + _test_if_mounted_negative ${MOUNTS_PATH1} + _test_if_mounted_negative ${MOUNTS_PATH2} + _test_if_mounted_negative ${MOUNTS_PATH3} + echo '[SUCCESS] mounts test3 passed successfully' >> "${TEST_RESULTS}" + + # Test invalid mount + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid1.yaml + echo "conf: + mounts: + mnt: + mnt_tgt: '${MOUNTS_PATH1}' + device: '/dev/bogus' + type: 'bogus' + options: 'defaults'" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status mounts expect_failure # systemd has long 3 min timeout + __set_systemd_name "${MOUNTS_PATH1}" mount + _test_clog_msg "${SYSTEMD_NAME} failed." + echo '[SUCCESS] mounts test4 passed successfully' >> "${TEST_RESULTS}" +} + +_test_ethtool_value(){ + test "$(/sbin/ethtool -k ${DEVICE} | + grep "${1}:" | cut -d':' -f2 | tr -d '[:space:]')" = "${2}" +} + +test_ethtool(){ + # Test the first set of values + local overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-set1.yaml + local val2=on + local val3=off + local val4=off + echo "conf: + ethtool: + ${DEVICE}: + $ETHTOOL_KEY2: $val2 + $ETHTOOL_KEY3: $val3 + $ETHTOOL_KEY4: $val4" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status ethtool + _test_ethtool_value $ETHTOOL_KEY2 $val2 + _test_ethtool_value $ETHTOOL_KEY3 $val3 + _test_ethtool_value $ETHTOOL_KEY4 $val4 + echo '[SUCCESS] ethtool test1 passed successfully' >> "${TEST_RESULTS}" + + # Test an updated set of values + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-set2.yaml + val2=off + val3=on + val4=on + echo "conf: + ethtool: + ${DEVICE}: + $ETHTOOL_KEY2: $val2 + $ETHTOOL_KEY3: $val3 + $ETHTOOL_KEY4: $val4" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status ethtool + _test_ethtool_value $ETHTOOL_KEY2 $val2 + _test_ethtool_value $ETHTOOL_KEY3 $val3 + _test_ethtool_value $ETHTOOL_KEY4 $val4 + echo '[SUCCESS] ethtool test2 passed successfully' >> "${TEST_RESULTS}" + + # Test revert/rollback functionality + install_base + get_container_status ethtool + _test_ethtool_value $ETHTOOL_KEY2 $ETHTOOL_VAL2_DEFAULT + _test_ethtool_value $ETHTOOL_KEY3 $ETHTOOL_VAL3_DEFAULT + _test_ethtool_value $ETHTOOL_KEY4 $ETHTOOL_VAL4_DEFAULT + echo '[SUCCESS] ethtool test3 passed successfully' >> "${TEST_RESULTS}" + + # Test invalid key + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid1.yaml + echo "conf: + ethtool: + ${DEVICE}: + this-is-a-bogus-key: $val2" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status ethtool expect_failure + _test_clog_msg "Could not find requested param this-is-a-bogus-key for ${DEVICE}" + echo '[SUCCESS] ethtool test4 passed successfully' >> "${TEST_RESULTS}" + + # Test invalid val + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid2.yaml + echo "conf: + ethtool: + ${DEVICE}: + $ETHTOOL_KEY2: bogus" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status ethtool expect_failure + _test_clog_msg "Expected 'on' or 'off', got 'bogus'" + echo '[SUCCESS] ethtool test5 passed successfully' >> "${TEST_RESULTS}" + + # Test fixed (unchangeable) ethtool param + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid3.yaml + echo "conf: + ethtool: + ${DEVICE}: + hw-tc-offload: on" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status ethtool expect_failure + _test_clog_msg "does not permit changing the 'hw-tc-offload' setting" + echo '[SUCCESS] ethtool test6 passed successfully' >> "${TEST_RESULTS}" + + # Test ethtool settings conflict + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-invalid4.yaml + echo "conf: + ethtool: + ${DEVICE}: + ${ETHTOOL_KEY2}: on + ${ETHTOOL_KEY5}: off" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status ethtool expect_failure + _test_clog_msg 'There is a conflict between settings chosen for this device.' + echo '[SUCCESS] ethtool test7 passed successfully' >> "${TEST_RESULTS}" +} + +# test daemonset value overrides for hosts and labels +test_overrides(){ + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-dryrun.yaml + echo "conf: + sysctl: + net.ipv4.ip_forward: 1 + net.ipv6.conf.all.forwarding: 1 + overrides: + divingbell-sysctl: + labels: + - label: + key: compute_type + values: + - dpdk + - sriov + conf: + sysctl: + net.ipv4.ip_forward: 1 + - label: + key: another_label + values: + - another_value + conf: + sysctl: + net.ipv4.ip_forward: 1 + - label: + key: test_label + values: + - test_value + conf: + sysctl: + net.ipv4.ip_forward: 1 + hosts: + - name: superhost + conf: + sysctl: + net.ipv4.ip_forward: 0 + net.ipv6.conf.all.forwarding: 0 + - name: helm1 + conf: + sysctl: + net.ipv6.conf.all.forwarding: 0 + - name: specialhost + conf: + sysctl: + net.ipv6.conf.all.forwarding: 1 + divingbell-mounts: + labels: + - label: + key: blarg + values: + - soup + - chips + conf: + mounts: + mnt: + mnt_tgt: /mnt + device: tmpfs + type: tmpfs + options: 'defaults,noatime,nosuid,nodev,noexec,mode=1777,size=32M' + divingbell-ethtool: + hosts: + - name: ethtool-host + conf: + ethtool: + ens3: + hw-tc-offload: on + divingbell-bogus: + labels: + - label: + key: bogus + values: + - foo + - bar + conf: + bogus: + other_stuff: XYZ + - label: + key: bogus_label + values: + - bogus_value + conf: + bogus: + more_stuff: ABC + hosts: + - name: superhost2 + conf: + bogus: + other_stuff: FOO + more_stuff: BAR" > "${overrides_yaml}" + + tc_output="$(dry_run_base "--values=${overrides_yaml}")" + + # Compare against expected number of generated daemonsets + daemonset_count="$(echo "${tc_output}" | grep 'kind: DaemonSet' | wc -l)" + if [ "${daemonset_count}" != "11" ]; then + echo '[FAILURE] overrides test 1 failed' >> "${TEST_RESULTS}" + echo "Expected 11 daemonsets; got '${daemonset_count}'" >> "${TEST_RESULTS}" + exit 1 + else + echo '[SUCCESS] overrides test 1 passed successfully' >> "${TEST_RESULTS}" + fi + + # TODO: Implement more robust tests that do not depend on match expression + # ordering. + + # Verify generated affinity for one of the daemonset labels + echo "${tc_output}" | grep ' spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: another_label + operator: In + values: + - "another_value" + - key: compute_type + operator: NotIn + values: + - "dpdk" + - "sriov" + - key: test_label + operator: NotIn + values: + - "test_value" + - key: kubernetes.io/hostname + operator: NotIn + values: + - "superhost" + - key: kubernetes.io/hostname + operator: NotIn + values: + - "helm1" + - key: kubernetes.io/hostname + operator: NotIn + values: + - "specialhost" + hostNetwork: true' && + echo '[SUCCESS] overrides test 2 passed successfully' >> "${TEST_RESULTS}" || + (echo '[FAILURE] overrides test 2 failed' && exit 1) + + # Verify generated affinity for one of the daemonset hosts + echo "${tc_output}" | grep ' spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: blarg + operator: In + values: + - "soup" + - "chips" + hostNetwork: true' && + echo '[SUCCESS] overrides test 3 passed successfully' >> "${TEST_RESULTS}" || + (echo '[FAILURE] overrides test 3 failed' && exit 1) + + # Verify generated affinity for one of the daemonset defaults + echo "${tc_output}" | grep ' spec: + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/hostname + operator: NotIn + values: + - "superhost" + - key: kubernetes.io/hostname + operator: NotIn + values: + - "helm1" + - key: kubernetes.io/hostname + operator: NotIn + values: + - "specialhost" + - key: compute_type + operator: NotIn + values: + - "dpdk" + - "sriov" + - key: another_label + operator: NotIn + values: + - "another_value" + - key: test_label + operator: NotIn + values: + - "test_value" + hostNetwork: true' && + echo '[SUCCESS] overrides test 4 passed successfully' >> "${TEST_RESULTS}" || + (echo '[FAILURE] overrides test 4 failed' && exit 1) + + overrides_yaml=${LOGS_SUBDIR}/${FUNCNAME}-functional.yaml + key1_override_val=0 + key2_non_override_val=0 + echo "conf: + sysctl: + $SYSCTL_KEY1: 1 + $SYSCTL_KEY2: $key2_non_override_val + overrides: + divingbell-sysctl: + hosts: + - name: $(hostname -f) + conf: + sysctl: + $SYSCTL_KEY1: $key1_override_val" > "${overrides_yaml}" + install_base "--values=${overrides_yaml}" + get_container_status sysctl + _test_sysctl_default $SYSCTL_KEY1 $key1_override_val + _test_sysctl_default $SYSCTL_KEY2 $key2_non_override_val + echo '[SUCCESS] overrides test 5 passed successfully' >> "${TEST_RESULTS}" + +} + +# initialization +init_default_state make + +# run tests +install_base +test_sysctl +test_mounts +test_ethtool +purge_containers +test_overrides + +# retore initial state +init_default_state + +echo "All tests pass for ${NAME}" + diff --git a/divingbell/values.yaml b/divingbell/values.yaml new file mode 100644 index 0000000..cbb28dc --- /dev/null +++ b/divingbell/values.yaml @@ -0,0 +1,26 @@ +# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default values for divingbell. +# This is a YAML-formatted file. +# Declare name/value pairs to be passed into your templates. +# name: value + +images: + divingbell: 'ubuntu:16.04' + pull_policy: IfNotPresent + +conf: + chroot_mnt_path: '/mnt' + log_colors: False