Implement Prometheus metric integration
This implements Prometheus metric integration, including metric definition, collection, and exportation. End user documentation for supported metric data and exportation interface is included. Change-Id: Ia0837f28073d6cd8e0220ac84cdd261b32704ae4
This commit is contained in:
parent
77deecc294
commit
0721ed43aa
11
README.rst
11
README.rst
|
@ -90,13 +90,16 @@ Which should output something like this::
|
||||||
For more information on how to install and use Armada, please reference:
|
For more information on how to install and use Armada, please reference:
|
||||||
`Armada Quickstart`_.
|
`Armada Quickstart`_.
|
||||||
|
|
||||||
|
|
||||||
Integration Points
|
Integration Points
|
||||||
------------------
|
------------------
|
||||||
|
|
||||||
Armada CLI component has the following integration points:
|
Armada CLI component has the following integration points:
|
||||||
|
|
||||||
* `Tiller`_ manages Armada chart installations.
|
* `Tiller`_ manages Armada chart installations.
|
||||||
* `Deckhand`_ supplies storage and management of site designs and secrets.
|
* `Deckhand`_ is one of the supported control document sources for Armada.
|
||||||
|
* `Prometheus`_ exporter is provided for metric data related to application
|
||||||
|
of charts and collections of charts. See `metrics`_.
|
||||||
|
|
||||||
In addition, Armada's API component has the following integration points:
|
In addition, Armada's API component has the following integration points:
|
||||||
|
|
||||||
|
@ -110,10 +113,12 @@ Further Reading
|
||||||
|
|
||||||
.. _Manual Install Guide: https://airship-armada.readthedocs.io/en/latest/development/getting-started.html#developer-install-guide
|
.. _Manual Install Guide: https://airship-armada.readthedocs.io/en/latest/development/getting-started.html#developer-install-guide
|
||||||
.. _Armada Quickstart: https://airship-armada.readthedocs.io/en/latest/operations/guide-use-armada.html
|
.. _Armada Quickstart: https://airship-armada.readthedocs.io/en/latest/operations/guide-use-armada.html
|
||||||
|
.. _metrics: https://airship-armada.readthedocs.io/en/latest/operations/metrics.html#metrics
|
||||||
.. _kubectl: https://kubernetes.io/docs/user-guide/kubectl/kubectl_config/
|
.. _kubectl: https://kubernetes.io/docs/user-guide/kubectl/kubectl_config/
|
||||||
.. _Tiller: https://docs.helm.sh/using_helm/#easy-in-cluster-installation
|
.. _Tiller: https://docs.helm.sh/using_helm/#easy-in-cluster-installation
|
||||||
.. _Deckhand: https://opendev.org/airship/deckhand
|
.. _Deckhand: https://github.com/openstack/airship-deckhand
|
||||||
.. _Keystone: https://opendev.org/openstack/keystone
|
.. _Prometheus: https://prometheus.io
|
||||||
|
.. _Keystone: https://github.com/openstack/keystone
|
||||||
|
|
||||||
.. |Docker Repository on Quay| image:: https://quay.io/repository/airshipit/armada/status
|
.. |Docker Repository on Quay| image:: https://quay.io/repository/airshipit/armada/status
|
||||||
:target: https://quay.io/repository/airshipit/armada
|
:target: https://quay.io/repository/airshipit/armada
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
# Copyright 2019 The Armada Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
import falcon
|
||||||
|
import prometheus_client
|
||||||
|
|
||||||
|
from armada import api
|
||||||
|
from armada.handlers import metrics
|
||||||
|
|
||||||
|
|
||||||
|
class Metrics(api.BaseResource):
|
||||||
|
'''Controller for exporting prometheus metrics.
|
||||||
|
'''
|
||||||
|
|
||||||
|
def on_get(self, req, resp):
|
||||||
|
encoder, content_type = prometheus_client.exposition.choose_encoder(
|
||||||
|
req.get_header('Accept'))
|
||||||
|
try:
|
||||||
|
output = encoder(metrics.REGISTRY)
|
||||||
|
except Exception as ex:
|
||||||
|
err_message = 'Failed to generate metric output'
|
||||||
|
self.logger.error(err_message, exc_info=ex)
|
||||||
|
return self.return_error(
|
||||||
|
resp, falcon.HTTP_500, message=err_message)
|
||||||
|
resp.content_type = content_type
|
||||||
|
resp.body = output
|
||||||
|
resp.status = falcon.HTTP_200
|
|
@ -27,6 +27,7 @@ from armada.api.controller.rollback import Rollback
|
||||||
from armada.api.controller.test import TestReleasesReleaseNameController
|
from armada.api.controller.test import TestReleasesReleaseNameController
|
||||||
from armada.api.controller.test import TestReleasesManifestController
|
from armada.api.controller.test import TestReleasesManifestController
|
||||||
from armada.api.controller.health import Health
|
from armada.api.controller.health import Health
|
||||||
|
from armada.api.controller.metrics import Metrics
|
||||||
from armada.api.controller.tiller import Release
|
from armada.api.controller.tiller import Release
|
||||||
from armada.api.controller.tiller import Status
|
from armada.api.controller.tiller import Status
|
||||||
from armada.api.controller.validation import Validate
|
from armada.api.controller.validation import Validate
|
||||||
|
@ -59,7 +60,7 @@ def create(enable_middleware=CONF.middleware):
|
||||||
logging.setup(CONF, 'armada')
|
logging.setup(CONF, 'armada')
|
||||||
|
|
||||||
# Configure API routing
|
# Configure API routing
|
||||||
url_routes_v1 = (
|
url_routes_v1 = [
|
||||||
(HEALTH_PATH, Health()),
|
(HEALTH_PATH, Health()),
|
||||||
('apply', Apply()),
|
('apply', Apply()),
|
||||||
('releases', Release()),
|
('releases', Release()),
|
||||||
|
@ -68,7 +69,8 @@ def create(enable_middleware=CONF.middleware):
|
||||||
('tests', TestReleasesManifestController()),
|
('tests', TestReleasesManifestController()),
|
||||||
('test/{release}', TestReleasesReleaseNameController()),
|
('test/{release}', TestReleasesReleaseNameController()),
|
||||||
('validatedesign', Validate()),
|
('validatedesign', Validate()),
|
||||||
)
|
('metrics', Metrics()),
|
||||||
|
]
|
||||||
|
|
||||||
for route, service in url_routes_v1:
|
for route, service in url_routes_v1:
|
||||||
api.add_route("/api/v1.0/{}".format(route), service)
|
api.add_route("/api/v1.0/{}".format(route), service)
|
||||||
|
|
|
@ -14,10 +14,12 @@
|
||||||
|
|
||||||
import click
|
import click
|
||||||
from oslo_config import cfg
|
from oslo_config import cfg
|
||||||
|
import prometheus_client
|
||||||
import yaml
|
import yaml
|
||||||
|
|
||||||
from armada.cli import CliAction
|
from armada.cli import CliAction
|
||||||
from armada.exceptions.source_exceptions import InvalidPathException
|
from armada.exceptions.source_exceptions import InvalidPathException
|
||||||
|
from armada.handlers import metrics
|
||||||
from armada.handlers.armada import Armada
|
from armada.handlers.armada import Armada
|
||||||
from armada.handlers.document import ReferenceResolver
|
from armada.handlers.document import ReferenceResolver
|
||||||
from armada.handlers.lock import lock_and_thread
|
from armada.handlers.lock import lock_and_thread
|
||||||
|
@ -81,6 +83,12 @@ SHORT_DESC = "Command installs manifest charts."
|
||||||
'--dry-run', help="Run charts without installing them.", is_flag=True)
|
'--dry-run', help="Run charts without installing them.", is_flag=True)
|
||||||
@click.option(
|
@click.option(
|
||||||
'--enable-chart-cleanup', help="Clean up unmanaged charts.", is_flag=True)
|
'--enable-chart-cleanup', help="Clean up unmanaged charts.", is_flag=True)
|
||||||
|
@click.option(
|
||||||
|
'--metrics-output',
|
||||||
|
help=(
|
||||||
|
"Output path for prometheus metric data, should end in .prom. By "
|
||||||
|
"default, no metric data is output."),
|
||||||
|
default=None)
|
||||||
@click.option(
|
@click.option(
|
||||||
'--use-doc-ref', help="Use armada manifest file reference.", is_flag=True)
|
'--use-doc-ref', help="Use armada manifest file reference.", is_flag=True)
|
||||||
@click.option(
|
@click.option(
|
||||||
|
@ -121,7 +129,7 @@ SHORT_DESC = "Command installs manifest charts."
|
||||||
'--wait',
|
'--wait',
|
||||||
help=(
|
help=(
|
||||||
"Force Tiller to wait until all charts are deployed, "
|
"Force Tiller to wait until all charts are deployed, "
|
||||||
"rather than using each chart's specified wait policy. "
|
"rather than using each charts specified wait policy. "
|
||||||
"This is equivalent to sequenced chartgroups."),
|
"This is equivalent to sequenced chartgroups."),
|
||||||
is_flag=True)
|
is_flag=True)
|
||||||
@click.option(
|
@click.option(
|
||||||
|
@ -135,22 +143,22 @@ SHORT_DESC = "Command installs manifest charts."
|
||||||
@click.pass_context
|
@click.pass_context
|
||||||
def apply_create(
|
def apply_create(
|
||||||
ctx, locations, api, disable_update_post, disable_update_pre, dry_run,
|
ctx, locations, api, disable_update_post, disable_update_pre, dry_run,
|
||||||
enable_chart_cleanup, use_doc_ref, set, tiller_host, tiller_port,
|
enable_chart_cleanup, metrics_output, use_doc_ref, set, tiller_host,
|
||||||
tiller_namespace, timeout, values, wait, target_manifest, bearer_token,
|
tiller_port, tiller_namespace, timeout, values, wait, target_manifest,
|
||||||
debug):
|
bearer_token, debug):
|
||||||
CONF.debug = debug
|
CONF.debug = debug
|
||||||
ApplyManifest(
|
ApplyManifest(
|
||||||
ctx, locations, api, disable_update_post, disable_update_pre, dry_run,
|
ctx, locations, api, disable_update_post, disable_update_pre, dry_run,
|
||||||
enable_chart_cleanup, use_doc_ref, set, tiller_host, tiller_port,
|
enable_chart_cleanup, metrics_output, use_doc_ref, set, tiller_host,
|
||||||
tiller_namespace, timeout, values, wait, target_manifest,
|
tiller_port, tiller_namespace, timeout, values, wait, target_manifest,
|
||||||
bearer_token).safe_invoke()
|
bearer_token).safe_invoke()
|
||||||
|
|
||||||
|
|
||||||
class ApplyManifest(CliAction):
|
class ApplyManifest(CliAction):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, ctx, locations, api, disable_update_post, disable_update_pre,
|
self, ctx, locations, api, disable_update_post, disable_update_pre,
|
||||||
dry_run, enable_chart_cleanup, use_doc_ref, set, tiller_host,
|
dry_run, enable_chart_cleanup, metrics_output, use_doc_ref, set,
|
||||||
tiller_port, tiller_namespace, timeout, values, wait,
|
tiller_host, tiller_port, tiller_namespace, timeout, values, wait,
|
||||||
target_manifest, bearer_token):
|
target_manifest, bearer_token):
|
||||||
super(ApplyManifest, self).__init__()
|
super(ApplyManifest, self).__init__()
|
||||||
self.ctx = ctx
|
self.ctx = ctx
|
||||||
|
@ -161,6 +169,7 @@ class ApplyManifest(CliAction):
|
||||||
self.disable_update_pre = disable_update_pre
|
self.disable_update_pre = disable_update_pre
|
||||||
self.dry_run = dry_run
|
self.dry_run = dry_run
|
||||||
self.enable_chart_cleanup = enable_chart_cleanup
|
self.enable_chart_cleanup = enable_chart_cleanup
|
||||||
|
self.metrics_output = metrics_output
|
||||||
self.use_doc_ref = use_doc_ref
|
self.use_doc_ref = use_doc_ref
|
||||||
self.set = set
|
self.set = set
|
||||||
self.tiller_host = tiller_host
|
self.tiller_host = tiller_host
|
||||||
|
@ -210,8 +219,16 @@ class ApplyManifest(CliAction):
|
||||||
bearer_token=self.bearer_token,
|
bearer_token=self.bearer_token,
|
||||||
dry_run=self.dry_run) as tiller:
|
dry_run=self.dry_run) as tiller:
|
||||||
|
|
||||||
resp = self.handle(documents, tiller)
|
try:
|
||||||
self.output(resp)
|
resp = self.handle(documents, tiller)
|
||||||
|
self.output(resp)
|
||||||
|
finally:
|
||||||
|
if self.metrics_output:
|
||||||
|
path = self.metrics_output
|
||||||
|
self.logger.info(
|
||||||
|
'Storing metrics output in path: {}'.format(path))
|
||||||
|
prometheus_client.write_to_textfile(
|
||||||
|
path, metrics.REGISTRY)
|
||||||
else:
|
else:
|
||||||
if len(self.values) > 0:
|
if len(self.values) > 0:
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
|
|
|
@ -24,6 +24,7 @@ from armada.exceptions import override_exceptions
|
||||||
from armada.exceptions import source_exceptions
|
from armada.exceptions import source_exceptions
|
||||||
from armada.exceptions import tiller_exceptions
|
from armada.exceptions import tiller_exceptions
|
||||||
from armada.exceptions import validate_exceptions
|
from armada.exceptions import validate_exceptions
|
||||||
|
from armada.handlers import metrics
|
||||||
from armada.handlers.chart_deploy import ChartDeploy
|
from armada.handlers.chart_deploy import ChartDeploy
|
||||||
from armada.handlers.manifest import Manifest
|
from armada.handlers.manifest import Manifest
|
||||||
from armada.handlers.override import Override
|
from armada.handlers.override import Override
|
||||||
|
@ -92,8 +93,9 @@ class Armada(object):
|
||||||
self.documents, target_manifest=target_manifest).get_manifest()
|
self.documents, target_manifest=target_manifest).get_manifest()
|
||||||
self.chart_cache = {}
|
self.chart_cache = {}
|
||||||
self.chart_deploy = ChartDeploy(
|
self.chart_deploy = ChartDeploy(
|
||||||
disable_update_pre, disable_update_post, self.dry_run,
|
self.manifest, disable_update_pre, disable_update_post,
|
||||||
k8s_wait_attempts, k8s_wait_attempt_sleep, timeout, self.tiller)
|
self.dry_run, k8s_wait_attempts, k8s_wait_attempt_sleep, timeout,
|
||||||
|
self.tiller)
|
||||||
|
|
||||||
def pre_flight_ops(self):
|
def pre_flight_ops(self):
|
||||||
"""Perform a series of checks and operations to ensure proper
|
"""Perform a series of checks and operations to ensure proper
|
||||||
|
@ -113,6 +115,12 @@ class Armada(object):
|
||||||
self.get_chart(ch)
|
self.get_chart(ch)
|
||||||
|
|
||||||
def get_chart(self, ch):
|
def get_chart(self, ch):
|
||||||
|
manifest_name = self.manifest['metadata']['name']
|
||||||
|
chart_name = ch['metadata']['name']
|
||||||
|
with metrics.CHART_DOWNLOAD.get_context(manifest_name, chart_name):
|
||||||
|
return self._get_chart(ch)
|
||||||
|
|
||||||
|
def _get_chart(self, ch):
|
||||||
chart = ch.get(const.KEYWORD_DATA)
|
chart = ch.get(const.KEYWORD_DATA)
|
||||||
chart_source = chart.get('source', {})
|
chart_source = chart.get('source', {})
|
||||||
location = chart_source.get('location')
|
location = chart_source.get('location')
|
||||||
|
@ -171,6 +179,11 @@ class Armada(object):
|
||||||
'''
|
'''
|
||||||
Synchronize Helm with the Armada Config(s)
|
Synchronize Helm with the Armada Config(s)
|
||||||
'''
|
'''
|
||||||
|
manifest_name = self.manifest['metadata']['name']
|
||||||
|
with metrics.APPLY.get_context(manifest_name):
|
||||||
|
return self._sync()
|
||||||
|
|
||||||
|
def _sync(self):
|
||||||
if self.dry_run:
|
if self.dry_run:
|
||||||
LOG.info('Armada is in DRY RUN mode, no changes being made.')
|
LOG.info('Armada is in DRY RUN mode, no changes being made.')
|
||||||
|
|
||||||
|
@ -207,11 +220,12 @@ class Armada(object):
|
||||||
|
|
||||||
cg_charts = chartgroup.get(const.KEYWORD_CHARTS, [])
|
cg_charts = chartgroup.get(const.KEYWORD_CHARTS, [])
|
||||||
|
|
||||||
def deploy_chart(chart):
|
def deploy_chart(chart, concurrency):
|
||||||
set_current_chart(chart)
|
set_current_chart(chart)
|
||||||
try:
|
try:
|
||||||
return self.chart_deploy.execute(
|
return self.chart_deploy.execute(
|
||||||
chart, cg_test_all_charts, prefix, known_releases)
|
chart, cg_test_all_charts, prefix, known_releases,
|
||||||
|
concurrency)
|
||||||
finally:
|
finally:
|
||||||
set_current_chart(None)
|
set_current_chart(None)
|
||||||
|
|
||||||
|
@ -233,13 +247,14 @@ class Armada(object):
|
||||||
|
|
||||||
if cg_sequenced:
|
if cg_sequenced:
|
||||||
for chart in cg_charts:
|
for chart in cg_charts:
|
||||||
if (handle_result(chart, lambda: deploy_chart(chart))):
|
if (handle_result(chart, lambda: deploy_chart(chart, 1))):
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
with ThreadPoolExecutor(
|
with ThreadPoolExecutor(
|
||||||
max_workers=len(cg_charts)) as executor:
|
max_workers=len(cg_charts)) as executor:
|
||||||
future_to_chart = {
|
future_to_chart = {
|
||||||
executor.submit(deploy_chart, chart): chart
|
executor.submit(deploy_chart, chart, len(cg_charts)):
|
||||||
|
chart
|
||||||
for chart in cg_charts
|
for chart in cg_charts
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -19,6 +19,7 @@ import yaml
|
||||||
|
|
||||||
from armada import const
|
from armada import const
|
||||||
from armada.exceptions import armada_exceptions
|
from armada.exceptions import armada_exceptions
|
||||||
|
from armada.handlers import metrics
|
||||||
from armada.handlers.chartbuilder import ChartBuilder
|
from armada.handlers.chartbuilder import ChartBuilder
|
||||||
from armada.handlers.release_diff import ReleaseDiff
|
from armada.handlers.release_diff import ReleaseDiff
|
||||||
from armada.handlers.chart_delete import ChartDelete
|
from armada.handlers.chart_delete import ChartDelete
|
||||||
|
@ -33,8 +34,9 @@ LOG = logging.getLogger(__name__)
|
||||||
|
|
||||||
class ChartDeploy(object):
|
class ChartDeploy(object):
|
||||||
def __init__(
|
def __init__(
|
||||||
self, disable_update_pre, disable_update_post, dry_run,
|
self, manifest, disable_update_pre, disable_update_post, dry_run,
|
||||||
k8s_wait_attempts, k8s_wait_attempt_sleep, timeout, tiller):
|
k8s_wait_attempts, k8s_wait_attempt_sleep, timeout, tiller):
|
||||||
|
self.manifest = manifest
|
||||||
self.disable_update_pre = disable_update_pre
|
self.disable_update_pre = disable_update_pre
|
||||||
self.disable_update_post = disable_update_post
|
self.disable_update_post = disable_update_post
|
||||||
self.dry_run = dry_run
|
self.dry_run = dry_run
|
||||||
|
@ -43,25 +45,26 @@ class ChartDeploy(object):
|
||||||
self.timeout = timeout
|
self.timeout = timeout
|
||||||
self.tiller = tiller
|
self.tiller = tiller
|
||||||
|
|
||||||
def execute(self, ch, cg_test_all_charts, prefix, known_releases):
|
def execute(
|
||||||
|
self, ch, cg_test_all_charts, prefix, known_releases, concurrency):
|
||||||
|
chart_name = ch['metadata']['name']
|
||||||
|
manifest_name = self.manifest['metadata']['name']
|
||||||
|
with metrics.CHART_HANDLE.get_context(concurrency, manifest_name,
|
||||||
|
chart_name):
|
||||||
|
return self._execute(
|
||||||
|
ch, cg_test_all_charts, prefix, known_releases)
|
||||||
|
|
||||||
|
def _execute(self, ch, cg_test_all_charts, prefix, known_releases):
|
||||||
|
manifest_name = self.manifest['metadata']['name']
|
||||||
chart = ch[const.KEYWORD_DATA]
|
chart = ch[const.KEYWORD_DATA]
|
||||||
|
chart_name = ch['metadata']['name']
|
||||||
namespace = chart.get('namespace')
|
namespace = chart.get('namespace')
|
||||||
release = chart.get('release')
|
release = chart.get('release')
|
||||||
release_name = r.release_prefixer(prefix, release)
|
release_name = r.release_prefixer(prefix, release)
|
||||||
LOG.info('Processing Chart, release=%s', release_name)
|
LOG.info('Processing Chart, release=%s', release_name)
|
||||||
|
|
||||||
values = chart.get('values', {})
|
|
||||||
pre_actions = {}
|
|
||||||
post_actions = {}
|
|
||||||
|
|
||||||
result = {}
|
result = {}
|
||||||
|
|
||||||
old_release = self.find_chart_release(known_releases, release_name)
|
|
||||||
|
|
||||||
status = None
|
|
||||||
if old_release:
|
|
||||||
status = r.get_release_status(old_release)
|
|
||||||
|
|
||||||
chart_wait = ChartWait(
|
chart_wait = ChartWait(
|
||||||
self.tiller.k8s,
|
self.tiller.k8s,
|
||||||
release_name,
|
release_name,
|
||||||
|
@ -70,18 +73,32 @@ class ChartDeploy(object):
|
||||||
k8s_wait_attempts=self.k8s_wait_attempts,
|
k8s_wait_attempts=self.k8s_wait_attempts,
|
||||||
k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
|
k8s_wait_attempt_sleep=self.k8s_wait_attempt_sleep,
|
||||||
timeout=self.timeout)
|
timeout=self.timeout)
|
||||||
|
wait_timeout = chart_wait.get_timeout()
|
||||||
native_wait_enabled = chart_wait.is_native_enabled()
|
|
||||||
|
|
||||||
# Begin Chart timeout deadline
|
# Begin Chart timeout deadline
|
||||||
deadline = time.time() + chart_wait.get_timeout()
|
deadline = time.time() + wait_timeout
|
||||||
|
old_release = self.find_chart_release(known_releases, release_name)
|
||||||
|
action = metrics.ChartDeployAction.NOOP
|
||||||
|
|
||||||
|
def noop():
|
||||||
|
pass
|
||||||
|
|
||||||
|
deploy = noop
|
||||||
|
|
||||||
|
# Resolve action
|
||||||
|
values = chart.get('values', {})
|
||||||
|
pre_actions = {}
|
||||||
|
post_actions = {}
|
||||||
|
|
||||||
|
status = None
|
||||||
|
if old_release:
|
||||||
|
status = r.get_release_status(old_release)
|
||||||
|
|
||||||
|
native_wait_enabled = chart_wait.is_native_enabled()
|
||||||
|
|
||||||
chartbuilder = ChartBuilder(ch)
|
chartbuilder = ChartBuilder(ch)
|
||||||
new_chart = chartbuilder.get_helm_chart()
|
new_chart = chartbuilder.get_helm_chart()
|
||||||
|
|
||||||
# TODO(mark-burnett): It may be more robust to directly call
|
|
||||||
# tiller status to decide whether to install/upgrade rather
|
|
||||||
# than checking for list membership.
|
|
||||||
if status == const.STATUS_DEPLOYED:
|
if status == const.STATUS_DEPLOYED:
|
||||||
|
|
||||||
# indicate to the end user what path we are taking
|
# indicate to the end user what path we are taking
|
||||||
|
@ -135,36 +152,37 @@ class ChartDeploy(object):
|
||||||
if not diff:
|
if not diff:
|
||||||
LOG.info("Found no updates to chart release inputs")
|
LOG.info("Found no updates to chart release inputs")
|
||||||
else:
|
else:
|
||||||
|
action = metrics.ChartDeployAction.UPGRADE
|
||||||
LOG.info("Found updates to chart release inputs")
|
LOG.info("Found updates to chart release inputs")
|
||||||
LOG.debug("%s", diff)
|
LOG.debug("%s", diff)
|
||||||
result['diff'] = {chart['release']: str(diff)}
|
result['diff'] = {chart['release']: str(diff)}
|
||||||
|
|
||||||
# TODO(MarshM): Add tiller dry-run before upgrade and
|
def upgrade():
|
||||||
# consider deadline impacts
|
# do actual update
|
||||||
|
timer = int(round(deadline - time.time()))
|
||||||
|
LOG.info(
|
||||||
|
"Upgrading release %s in namespace %s, wait=%s, "
|
||||||
|
"timeout=%ss", release_name, namespace,
|
||||||
|
native_wait_enabled, timer)
|
||||||
|
tiller_result = self.tiller.update_release(
|
||||||
|
new_chart,
|
||||||
|
release_name,
|
||||||
|
namespace,
|
||||||
|
pre_actions=pre_actions,
|
||||||
|
post_actions=post_actions,
|
||||||
|
disable_hooks=disable_hooks,
|
||||||
|
values=yaml.safe_dump(values),
|
||||||
|
wait=native_wait_enabled,
|
||||||
|
timeout=timer,
|
||||||
|
force=force,
|
||||||
|
recreate_pods=recreate_pods)
|
||||||
|
|
||||||
# do actual update
|
LOG.info(
|
||||||
timer = int(round(deadline - time.time()))
|
'Upgrade completed with results from Tiller: %s',
|
||||||
LOG.info(
|
tiller_result.__dict__)
|
||||||
"Upgrading release %s in namespace %s, wait=%s, "
|
result['upgrade'] = release_name
|
||||||
"timeout=%ss", release_name, namespace,
|
|
||||||
native_wait_enabled, timer)
|
|
||||||
tiller_result = self.tiller.update_release(
|
|
||||||
new_chart,
|
|
||||||
release_name,
|
|
||||||
namespace,
|
|
||||||
pre_actions=pre_actions,
|
|
||||||
post_actions=post_actions,
|
|
||||||
disable_hooks=disable_hooks,
|
|
||||||
values=yaml.safe_dump(values),
|
|
||||||
wait=native_wait_enabled,
|
|
||||||
timeout=timer,
|
|
||||||
force=force,
|
|
||||||
recreate_pods=recreate_pods)
|
|
||||||
|
|
||||||
LOG.info(
|
deploy = upgrade
|
||||||
'Upgrade completed with results from Tiller: %s',
|
|
||||||
tiller_result.__dict__)
|
|
||||||
result['upgrade'] = release_name
|
|
||||||
else:
|
else:
|
||||||
# Check for release with status other than DEPLOYED
|
# Check for release with status other than DEPLOYED
|
||||||
if status:
|
if status:
|
||||||
|
@ -178,7 +196,6 @@ class ChartDeploy(object):
|
||||||
# was started within the timeout window of the chart.
|
# was started within the timeout window of the chart.
|
||||||
last_deployment_age = r.get_last_deployment_age(
|
last_deployment_age = r.get_last_deployment_age(
|
||||||
old_release)
|
old_release)
|
||||||
wait_timeout = chart_wait.get_timeout()
|
|
||||||
likely_pending = last_deployment_age <= wait_timeout
|
likely_pending = last_deployment_age <= wait_timeout
|
||||||
if likely_pending:
|
if likely_pending:
|
||||||
# Give up if a deployment is likely pending, we do not
|
# Give up if a deployment is likely pending, we do not
|
||||||
|
@ -217,35 +234,49 @@ class ChartDeploy(object):
|
||||||
release_name, status)
|
release_name, status)
|
||||||
else:
|
else:
|
||||||
# Purge the release
|
# Purge the release
|
||||||
LOG.info(
|
with metrics.CHART_DELETE.get_context(manifest_name,
|
||||||
'Purging release %s with status %s', release_name,
|
chart_name):
|
||||||
status)
|
|
||||||
chart_delete = ChartDelete(
|
|
||||||
chart, release_name, self.tiller)
|
|
||||||
chart_delete.delete()
|
|
||||||
result['purge'] = release_name
|
|
||||||
|
|
||||||
|
LOG.info(
|
||||||
|
'Purging release %s with status %s', release_name,
|
||||||
|
status)
|
||||||
|
chart_delete = ChartDelete(
|
||||||
|
chart, release_name, self.tiller)
|
||||||
|
chart_delete.delete()
|
||||||
|
result['purge'] = release_name
|
||||||
|
|
||||||
|
action = metrics.ChartDeployAction.INSTALL
|
||||||
|
|
||||||
|
def install():
|
||||||
|
timer = int(round(deadline - time.time()))
|
||||||
|
LOG.info(
|
||||||
|
"Installing release %s in namespace %s, wait=%s, "
|
||||||
|
"timeout=%ss", release_name, namespace,
|
||||||
|
native_wait_enabled, timer)
|
||||||
|
tiller_result = self.tiller.install_release(
|
||||||
|
new_chart,
|
||||||
|
release_name,
|
||||||
|
namespace,
|
||||||
|
values=yaml.safe_dump(values),
|
||||||
|
wait=native_wait_enabled,
|
||||||
|
timeout=timer)
|
||||||
|
|
||||||
|
LOG.info(
|
||||||
|
'Install completed with results from Tiller: %s',
|
||||||
|
tiller_result.__dict__)
|
||||||
|
result['install'] = release_name
|
||||||
|
|
||||||
|
deploy = install
|
||||||
|
|
||||||
|
# Deploy
|
||||||
|
with metrics.CHART_DEPLOY.get_context(wait_timeout, manifest_name,
|
||||||
|
chart_name,
|
||||||
|
action.get_label_value()):
|
||||||
|
deploy()
|
||||||
|
|
||||||
|
# Wait
|
||||||
timer = int(round(deadline - time.time()))
|
timer = int(round(deadline - time.time()))
|
||||||
LOG.info(
|
chart_wait.wait(timer)
|
||||||
"Installing release %s in namespace %s, wait=%s, "
|
|
||||||
"timeout=%ss", release_name, namespace, native_wait_enabled,
|
|
||||||
timer)
|
|
||||||
tiller_result = self.tiller.install_release(
|
|
||||||
new_chart,
|
|
||||||
release_name,
|
|
||||||
namespace,
|
|
||||||
values=yaml.safe_dump(values),
|
|
||||||
wait=native_wait_enabled,
|
|
||||||
timeout=timer)
|
|
||||||
|
|
||||||
LOG.info(
|
|
||||||
'Install completed with results from Tiller: %s',
|
|
||||||
tiller_result.__dict__)
|
|
||||||
result['install'] = release_name
|
|
||||||
|
|
||||||
# Wait
|
|
||||||
timer = int(round(deadline - time.time()))
|
|
||||||
chart_wait.wait(timer)
|
|
||||||
|
|
||||||
# Test
|
# Test
|
||||||
just_deployed = ('install' in result) or ('upgrade' in result)
|
just_deployed = ('install' in result) or ('upgrade' in result)
|
||||||
|
@ -260,7 +291,9 @@ class ChartDeploy(object):
|
||||||
run_test = test_handler.test_enabled and (
|
run_test = test_handler.test_enabled and (
|
||||||
just_deployed or not last_test_passed)
|
just_deployed or not last_test_passed)
|
||||||
if run_test:
|
if run_test:
|
||||||
self._test_chart(release_name, test_handler)
|
with metrics.CHART_TEST.get_context(test_handler.timeout,
|
||||||
|
manifest_name, chart_name):
|
||||||
|
self._test_chart(release_name, test_handler)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,175 @@
|
||||||
|
# Copyright 2019 The Armada Authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from contextlib import ExitStack
|
||||||
|
from enum import Enum
|
||||||
|
import os
|
||||||
|
|
||||||
|
import prometheus_client
|
||||||
|
from prometheus_client import multiprocess, values, context_managers
|
||||||
|
|
||||||
|
|
||||||
|
class ActionMetrics():
|
||||||
|
""" Support for defining and observing metrics for an action, including
|
||||||
|
tracking attempts, failures, and timing.
|
||||||
|
"""
|
||||||
|
|
||||||
|
_PREFIX = 'armada'
|
||||||
|
|
||||||
|
def __init__(self, prefix, description, labels):
|
||||||
|
"""
|
||||||
|
:param prefix: prefix to use for each metric name
|
||||||
|
:param description: description of action to use in metric description
|
||||||
|
:param labels: label names to define for each metric
|
||||||
|
"""
|
||||||
|
self.full_prefix = '{}_{}'.format(self.__class__._PREFIX, prefix)
|
||||||
|
self.progress = prometheus_client.Gauge(
|
||||||
|
'{}_attempt_inprogress'.format(self.full_prefix),
|
||||||
|
'In progress attempts to {}'.format(description),
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY,
|
||||||
|
multiprocess_mode='livesum')
|
||||||
|
self.attempt_total = prometheus_client.Counter(
|
||||||
|
'{}_attempt_total'.format(self.full_prefix),
|
||||||
|
'Total attempts to {}'.format(description),
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY)
|
||||||
|
self.failure_total = prometheus_client.Counter(
|
||||||
|
'{}_failure_total'.format(self.full_prefix),
|
||||||
|
'Total failures to {}'.format(description),
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY)
|
||||||
|
self.duration = prometheus_client.Histogram(
|
||||||
|
'{}_duration_seconds'.format(self.full_prefix),
|
||||||
|
'Seconds to {}'.format(description),
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY)
|
||||||
|
|
||||||
|
def get_context(self, *args, **kwargs):
|
||||||
|
""" Any extra args are used as metric label values.
|
||||||
|
|
||||||
|
:return: a context manager for the action which observes the desired
|
||||||
|
metrics.
|
||||||
|
:rtype: contextmanager
|
||||||
|
"""
|
||||||
|
progress = self.progress.labels(*args, **kwargs)
|
||||||
|
attempt_total = self.attempt_total.labels(*args, **kwargs)
|
||||||
|
attempt_total.inc()
|
||||||
|
failure_total = self.failure_total.labels(*args, **kwargs)
|
||||||
|
duration = self.duration.labels(*args, **kwargs)
|
||||||
|
|
||||||
|
e = ExitStack()
|
||||||
|
contexts = [
|
||||||
|
progress.track_inprogress(),
|
||||||
|
failure_total.count_exceptions(),
|
||||||
|
duration.time()
|
||||||
|
]
|
||||||
|
for ctx in contexts:
|
||||||
|
e.enter_context(ctx)
|
||||||
|
return e
|
||||||
|
|
||||||
|
|
||||||
|
class ChartHandleMetrics(ActionMetrics):
|
||||||
|
def __init__(self, prefix, description, labels):
|
||||||
|
super().__init__(prefix, description, labels)
|
||||||
|
self.concurrency = prometheus_client.Histogram(
|
||||||
|
'{}_concurrency_count'.format(self.full_prefix),
|
||||||
|
'Count of charts being handled concurrently for chart',
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY)
|
||||||
|
|
||||||
|
def get_context(self, concurrency_value, *args, **kwargs):
|
||||||
|
concurrency = self.concurrency.labels(*args, **kwargs)
|
||||||
|
concurrency.observe(concurrency_value)
|
||||||
|
return super().get_context(*args, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
class ActionWithTimeoutMetrics(ActionMetrics):
|
||||||
|
def __init__(self, prefix, description, labels):
|
||||||
|
super().__init__(prefix, description, labels)
|
||||||
|
self.timeout = prometheus_client.Histogram(
|
||||||
|
'{}_timeout_duration_seconds'.format(self.full_prefix),
|
||||||
|
'Configured timeout (in seconds) to {}'.format(description),
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY)
|
||||||
|
self.timeout_usage = prometheus_client.Histogram(
|
||||||
|
'{}_timeout_usage_ratio'.format(self.full_prefix),
|
||||||
|
'Ratio of duration to timeout to {}'.format(description),
|
||||||
|
labels,
|
||||||
|
registry=REGISTRY)
|
||||||
|
|
||||||
|
def get_context(self, timeout_value, *args, **kwargs):
|
||||||
|
timeout = self.timeout.labels(*args, **kwargs)
|
||||||
|
timeout_usage = self.timeout_usage.labels(*args, **kwargs)
|
||||||
|
|
||||||
|
timeout.observe(timeout_value)
|
||||||
|
|
||||||
|
def observe_timeout_usage(duration):
|
||||||
|
# Avoid division by 0
|
||||||
|
if timeout_value:
|
||||||
|
val = duration / timeout_value
|
||||||
|
timeout_usage.observe(val)
|
||||||
|
|
||||||
|
timer = context_managers.Timer(observe_timeout_usage)
|
||||||
|
context = super().get_context(*args, **kwargs)
|
||||||
|
context.enter_context(timer)
|
||||||
|
return context
|
||||||
|
|
||||||
|
|
||||||
|
class ChartDeployAction(Enum):
|
||||||
|
""" Enum to define sub-actions for the chart deploy action, to be used as
|
||||||
|
label values.
|
||||||
|
"""
|
||||||
|
|
||||||
|
INSTALL = 1
|
||||||
|
UPGRADE = 2
|
||||||
|
NOOP = 3
|
||||||
|
|
||||||
|
def get_label_value(self):
|
||||||
|
"""
|
||||||
|
:return: the label value
|
||||||
|
:rtype: str
|
||||||
|
"""
|
||||||
|
return self.name.lower()
|
||||||
|
|
||||||
|
|
||||||
|
REGISTRY = prometheus_client.CollectorRegistry()
|
||||||
|
|
||||||
|
if "prometheus_multiproc_dir" in os.environ:
|
||||||
|
# For why this is needed see:
|
||||||
|
# https://github.com/prometheus/client_python/issues/275#issuecomment-504755024
|
||||||
|
import uwsgi
|
||||||
|
prometheus_client.values.ValueClass = values.MultiProcessValue(
|
||||||
|
uwsgi.worker_id)
|
||||||
|
|
||||||
|
multiprocess.MultiProcessCollector(REGISTRY)
|
||||||
|
|
||||||
|
APPLY = ActionMetrics('apply', 'apply a manifest', ['manifest'])
|
||||||
|
# TODO: Ideally include an action (ChartDeployAction) label, but that's not
|
||||||
|
# determined until after chart handling starts.
|
||||||
|
CHART_HANDLE = ChartHandleMetrics(
|
||||||
|
'chart_handle',
|
||||||
|
'handle a chart (including delete, deploy, test (all as necessary) but '
|
||||||
|
'not download)', ['manifest', 'chart'])
|
||||||
|
CHART_DOWNLOAD = ActionMetrics(
|
||||||
|
'chart_download', 'download a chart (will be noop if previously cached)',
|
||||||
|
['manifest', 'chart'])
|
||||||
|
CHART_DELETE = ActionMetrics(
|
||||||
|
'chart_delete', 'delete a chart', ['manifest', 'chart'])
|
||||||
|
CHART_DEPLOY = ActionWithTimeoutMetrics(
|
||||||
|
'chart_deploy',
|
||||||
|
'deploy a chart (including install/upgrade and wait (all as necessary))',
|
||||||
|
['manifest', 'chart', 'action'])
|
||||||
|
CHART_TEST = ActionWithTimeoutMetrics(
|
||||||
|
'chart_test', 'test a chart', ['manifest', 'chart'])
|
|
@ -397,6 +397,7 @@ class ArmadaHandlerTestCase(base.ArmadaTestCase):
|
||||||
mock_test_release.side_effect = fail
|
mock_test_release.side_effect = fail
|
||||||
else:
|
else:
|
||||||
mock_test_release.return_value = test_success
|
mock_test_release.return_value = test_success
|
||||||
|
mock_test.return_value.timeout = const.DEFAULT_TEST_TIMEOUT
|
||||||
|
|
||||||
# Stub out irrelevant methods called by `armada.sync()`.
|
# Stub out irrelevant methods called by `armada.sync()`.
|
||||||
mock_chartbuilder.get_source_path.return_value = None
|
mock_chartbuilder.get_source_path.return_value = None
|
||||||
|
|
|
@ -18,6 +18,7 @@ limitations under the License.
|
||||||
{{- $envAll := . }}
|
{{- $envAll := . }}
|
||||||
{{- $mounts_armada_api := .Values.pod.mounts.armada_api.armada_api }}
|
{{- $mounts_armada_api := .Values.pod.mounts.armada_api.armada_api }}
|
||||||
{{- $mounts_armada_api_init := .Values.pod.mounts.armada_api.init_container }}
|
{{- $mounts_armada_api_init := .Values.pod.mounts.armada_api.init_container }}
|
||||||
|
{{- $prometheus_annotations := $envAll.Values.monitoring.prometheus.armada }}
|
||||||
{{- $serviceAccountName := "armada-api" }}
|
{{- $serviceAccountName := "armada-api" }}
|
||||||
{{ tuple $envAll "api" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
|
{{ tuple $envAll "api" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }}
|
||||||
---
|
---
|
||||||
|
@ -79,9 +80,9 @@ spec:
|
||||||
labels:
|
labels:
|
||||||
{{ tuple $envAll "armada" "api" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
|
{{ tuple $envAll "armada" "api" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 8 }}
|
||||||
annotations:
|
annotations:
|
||||||
{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" | indent 8 }}
|
|
||||||
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
|
configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }}
|
||||||
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
|
configmap-etc-hash: {{ tuple "configmap-etc.yaml" . | include "helm-toolkit.utils.hash" }}
|
||||||
|
{{ tuple $prometheus_annotations | include "helm-toolkit.snippets.prometheus_pod_annotations" | indent 8 }}
|
||||||
spec:
|
spec:
|
||||||
{{ dict "envAll" $envAll "application" "armada" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
|
{{ dict "envAll" $envAll "application" "armada" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 6 }}
|
||||||
serviceAccountName: {{ $serviceAccountName }}
|
serviceAccountName: {{ $serviceAccountName }}
|
||||||
|
@ -123,6 +124,8 @@ spec:
|
||||||
volumeMounts:
|
volumeMounts:
|
||||||
- name: pod-tmp
|
- name: pod-tmp
|
||||||
mountPath: /tmp
|
mountPath: /tmp
|
||||||
|
- name: pod-tmp-metrics
|
||||||
|
mountPath: /tmp/armada/metrics
|
||||||
- name: pod-etc-armada
|
- name: pod-etc-armada
|
||||||
mountPath: /etc/armada
|
mountPath: /etc/armada
|
||||||
- name: armada-etc
|
- name: armada-etc
|
||||||
|
@ -193,6 +196,9 @@ spec:
|
||||||
volumes:
|
volumes:
|
||||||
- name: pod-tmp
|
- name: pod-tmp
|
||||||
emptyDir: {}
|
emptyDir: {}
|
||||||
|
- name: pod-tmp-metrics
|
||||||
|
emptyDir:
|
||||||
|
medium: Memory
|
||||||
- name: pod-etc-armada
|
- name: pod-etc-armada
|
||||||
emptyDir: {}
|
emptyDir: {}
|
||||||
- name: armada-bin
|
- name: armada-bin
|
||||||
|
|
|
@ -216,6 +216,13 @@ conf:
|
||||||
# greater than that will have no effect.
|
# greater than that will have no effect.
|
||||||
prestop_sleep: 30
|
prestop_sleep: 30
|
||||||
|
|
||||||
|
monitoring:
|
||||||
|
prometheus:
|
||||||
|
armada:
|
||||||
|
scrape: true
|
||||||
|
path: /api/v1.0/metrics
|
||||||
|
port: 8000
|
||||||
|
|
||||||
pod:
|
pod:
|
||||||
security_context:
|
security_context:
|
||||||
armada:
|
armada:
|
||||||
|
|
|
@ -7,29 +7,29 @@ Commands
|
||||||
|
|
||||||
.. code:: bash
|
.. code:: bash
|
||||||
|
|
||||||
Usage: armada apply [OPTIONS] FILENAME
|
Usage: armada apply [OPTIONS] [LOCATIONS]...
|
||||||
|
|
||||||
This command installs and updates charts defined in armada manifest
|
This command installs and updates charts defined in Armada manifest.
|
||||||
|
|
||||||
The apply argument must be relative path to Armada Manifest. Executing
|
The apply argument must be relative path to Armada Manifest. Executing
|
||||||
apply command once will install all charts defined in manifest. Re-
|
apply command once will install all charts defined in manifest. Re-
|
||||||
executing apply command will execute upgrade.
|
executing apply command will execute upgrade.
|
||||||
|
|
||||||
To see how to create an Armada manifest:
|
To see how to create an Armada manifest: https://airship-
|
||||||
https://airship-armada.readthedocs.io/en/latest/operations/
|
armada.readthedocs.io/en/latest/operations/
|
||||||
|
|
||||||
To install or upgrade charts, run:
|
To install or upgrade charts, run:
|
||||||
|
|
||||||
$ armada apply examples/simple.yaml
|
$ armada apply examples/simple.yaml
|
||||||
|
|
||||||
To override a specific value in a Manifest, run:
|
To override a specific value in a Manifest, run:
|
||||||
|
|
||||||
$ armada apply examples/simple.yaml --set manifest:simple-armada:release="wordpress"
|
$ armada apply examples/simple.yaml --set manifest:simple-armada:release="wordpress"
|
||||||
|
|
||||||
Or to override several values in a Manifest, reference a values.yaml-
|
Or to override several values in a Manifest, reference a values.yaml-
|
||||||
formatted file:
|
formatted file:
|
||||||
|
|
||||||
$ armada apply examples/simple.yaml --values examples/simple-ovr-values.yaml
|
$ armada apply examples/simple.yaml --values examples/simple-ovr-values.yaml
|
||||||
|
|
||||||
Options:
|
Options:
|
||||||
--api Contacts service endpoint.
|
--api Contacts service endpoint.
|
||||||
|
@ -37,6 +37,8 @@ Commands
|
||||||
--disable-update-pre Disable pre-update Tiller operations.
|
--disable-update-pre Disable pre-update Tiller operations.
|
||||||
--dry-run Run charts without installing them.
|
--dry-run Run charts without installing them.
|
||||||
--enable-chart-cleanup Clean up unmanaged charts.
|
--enable-chart-cleanup Clean up unmanaged charts.
|
||||||
|
--metrics-output TEXT The output path for metric data
|
||||||
|
--use-doc-ref Use armada manifest file reference.
|
||||||
--set TEXT Use to override Armada Manifest values.
|
--set TEXT Use to override Armada Manifest values.
|
||||||
Accepts overrides that adhere to the format
|
Accepts overrides that adhere to the format
|
||||||
<path>:<to>:<property>=<value> to specify a
|
<path>:<to>:<property>=<value> to specify a
|
||||||
|
@ -46,15 +48,19 @@ Commands
|
||||||
--tiller-host TEXT Tiller host IP.
|
--tiller-host TEXT Tiller host IP.
|
||||||
--tiller-port INTEGER Tiller host port.
|
--tiller-port INTEGER Tiller host port.
|
||||||
-tn, --tiller-namespace TEXT Tiller namespace.
|
-tn, --tiller-namespace TEXT Tiller namespace.
|
||||||
--timeout INTEGER Specifies time to wait for charts to deploy.
|
--timeout INTEGER Specifies time to wait for each chart to fully
|
||||||
|
finish deploying.
|
||||||
-f, --values TEXT Use to override multiple Armada Manifest
|
-f, --values TEXT Use to override multiple Armada Manifest
|
||||||
values by reading overrides from a
|
values by reading overrides from a
|
||||||
values.yaml-type file.
|
values.yaml-type file.
|
||||||
--wait Wait until all charts deployed.
|
--wait Force Tiller to wait until all charts are
|
||||||
|
deployed, rather than using each charts
|
||||||
|
specified wait policy. This is equivalent to
|
||||||
|
sequenced chartgroups.
|
||||||
--target-manifest TEXT The target manifest to run. Required for
|
--target-manifest TEXT The target manifest to run. Required for
|
||||||
specifying which manifest to run when multiple
|
specifying which manifest to run when multiple
|
||||||
are available.
|
are available.
|
||||||
--bearer-token User bearer token.
|
--bearer-token TEXT User Bearer token
|
||||||
--debug Enable debug logging.
|
--debug Enable debug logging.
|
||||||
--help Show this message and exit.
|
--help Show this message and exit.
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ Operations Guide
|
||||||
guide-configure
|
guide-configure
|
||||||
guide-troubleshooting
|
guide-troubleshooting
|
||||||
guide-use-armada
|
guide-use-armada
|
||||||
|
metrics
|
||||||
exceptions/index
|
exceptions/index
|
||||||
guide-helm-plugin
|
guide-helm-plugin
|
||||||
sampleconf
|
sampleconf
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
.. _metrics:
|
||||||
|
|
||||||
|
Metrics
|
||||||
|
=======
|
||||||
|
|
||||||
|
Armada exposes metric data, for consumption by `Prometheus`_.
|
||||||
|
|
||||||
|
Exporting
|
||||||
|
---------
|
||||||
|
|
||||||
|
Metric data can be exported via:
|
||||||
|
|
||||||
|
* API: Prometheus exporter in the `/metrics` endpoint. The Armada chart
|
||||||
|
includes the appropriate Prometheus scrape configurations for this endpoint.
|
||||||
|
* CLI: `--metrics-output=<path>` of `apply` command. The
|
||||||
|
`node exporter text file collector`_ can then be used to export the produced
|
||||||
|
text files to Prometheus.
|
||||||
|
|
||||||
|
Metric Names
|
||||||
|
------------
|
||||||
|
|
||||||
|
Metric names are as follows:
|
||||||
|
|
||||||
|
`armada_` + <action> + `_` + <metric>
|
||||||
|
|
||||||
|
Supported <action>s
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
The below tree of <action>s are measured. Supported prometheus labels are noted.
|
||||||
|
Labels are inherited by sub-actions except as noted.
|
||||||
|
|
||||||
|
* `apply`:
|
||||||
|
|
||||||
|
* description: apply a manifest
|
||||||
|
* labels: `manifest`
|
||||||
|
* sub-actions:
|
||||||
|
|
||||||
|
* `chart_handle`:
|
||||||
|
|
||||||
|
* description: fully handle a chart (see below sub-actions)
|
||||||
|
* labels:
|
||||||
|
|
||||||
|
* `chart`
|
||||||
|
* `action` (install|upgrade|noop) (not included in sub-actions)
|
||||||
|
* sub-actions:
|
||||||
|
|
||||||
|
* `chart_download`
|
||||||
|
* `chart_deploy`
|
||||||
|
* `chart_test`
|
||||||
|
* `chart_delete`:
|
||||||
|
|
||||||
|
* description: delete a chart (e.g. due to `FAILED` status)
|
||||||
|
* labels: `chart`
|
||||||
|
|
||||||
|
Supported <metric>s
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
* `failure_total`: total failed attempts
|
||||||
|
* `attempt_total`: total attempts
|
||||||
|
* `attempt_inprogress`: total attempts in progress
|
||||||
|
* `duration_seconds`: duration of each attempt
|
||||||
|
|
||||||
|
Timeouts
|
||||||
|
^^^^^^^^
|
||||||
|
|
||||||
|
The `chart_handle` and `chart_test` actions additionally include the following
|
||||||
|
metrics:
|
||||||
|
|
||||||
|
* `timeout_duration_seconds`: configured chart timeout duration in seconds
|
||||||
|
* `timeout_usage_ratio`: `= duration_seconds / timeout_duration_seconds`
|
||||||
|
|
||||||
|
These can help identify charts whose timeouts may need to
|
||||||
|
be changed to avoid potential failures or to acheive faster failures.
|
||||||
|
|
||||||
|
Chart concurrency
|
||||||
|
^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The `chart_handle` action additionally includes the following metric:
|
||||||
|
|
||||||
|
* `concurrency_count`: count of charts being handled concurrently
|
||||||
|
|
||||||
|
This can help identify opportunities for greater chart concurrency.
|
||||||
|
|
||||||
|
.. _Prometheus: https://prometheus.io
|
||||||
|
.. _`node exporter text file collector`: https://github.com/prometheus/node_exporter#textfile-collector
|
|
@ -26,6 +26,8 @@ ARMADA_UWSGI_TIMEOUT=${ARMADA_UWSGI_TIMEOUT:-3600}
|
||||||
ARMADA_UWSGI_WORKERS=${ARMADA_UWSGI_WORKERS:-4}
|
ARMADA_UWSGI_WORKERS=${ARMADA_UWSGI_WORKERS:-4}
|
||||||
# Threads per worker
|
# Threads per worker
|
||||||
ARMADA_UWSGI_THREADS=${ARMADA_UWSGI_THREADS:-1}
|
ARMADA_UWSGI_THREADS=${ARMADA_UWSGI_THREADS:-1}
|
||||||
|
# Prometheus multiprocess dir
|
||||||
|
ARMADA_PROMETHEUS_MULTIPROC_DIR=${ARMADA_PROMETHEUS_MULTIPROC_DIR:-$(mktemp -d -p /tmp/armada/metrics XXXXXX)}
|
||||||
|
|
||||||
# Start Armada application
|
# Start Armada application
|
||||||
# TODO(fmontei): Should be specifying callable too. But Armada spins up the
|
# TODO(fmontei): Should be specifying callable too. But Armada spins up the
|
||||||
|
@ -37,6 +39,7 @@ if [ "$1" = 'server' ]; then
|
||||||
--http :"${ARMADA_UWSGI_PORT}" \
|
--http :"${ARMADA_UWSGI_PORT}" \
|
||||||
--http-timeout "$ARMADA_UWSGI_TIMEOUT" \
|
--http-timeout "$ARMADA_UWSGI_TIMEOUT" \
|
||||||
--enable-threads \
|
--enable-threads \
|
||||||
|
--env prometheus_multiproc_dir="$ARMADA_PROMETHEUS_MULTIPROC_DIR" \
|
||||||
-L \
|
-L \
|
||||||
--lazy-apps \
|
--lazy-apps \
|
||||||
--master \
|
--master \
|
||||||
|
|
|
@ -10,6 +10,7 @@ PasteDeploy>=1.5.2
|
||||||
protobuf>=3.4.0
|
protobuf>=3.4.0
|
||||||
PyYAML==3.12
|
PyYAML==3.12
|
||||||
requests
|
requests
|
||||||
|
prometheus_client==0.7.0
|
||||||
|
|
||||||
# API
|
# API
|
||||||
falcon
|
falcon
|
||||||
|
|
Loading…
Reference in New Issue