From 1fd8bb9812df91c80385681cffbea8be3a6ceac9 Mon Sep 17 00:00:00 2001 From: Aaron Sheffield Date: Tue, 13 Mar 2018 12:15:30 -0500 Subject: [PATCH] Migrating Health Check to UCP Standards - Added a second health check endpoint /extended to get additional data. - Conforms health check response with UCP standards. Change-Id: I9e3ac27ec7e536bb18201f1a4642490725a8062c --- charts/drydock/templates/etc/_policy.yaml.tpl | 4 + drydock_provisioner/control/api.py | 5 +- drydock_provisioner/control/health.py | 92 ++++++++++++++----- drydock_provisioner/objects/healthcheck.py | 74 +++++++++++++++ drydock_provisioner/policy.py | 9 +- etc/drydock/policy.yaml.sample | 4 + 6 files changed, 161 insertions(+), 27 deletions(-) create mode 100644 drydock_provisioner/objects/healthcheck.py diff --git a/charts/drydock/templates/etc/_policy.yaml.tpl b/charts/drydock/templates/etc/_policy.yaml.tpl index fb194106..2a5e26a2 100755 --- a/charts/drydock/templates/etc/_policy.yaml.tpl +++ b/charts/drydock/templates/etc/_policy.yaml.tpl @@ -44,3 +44,7 @@ # POST /api/v1.0/designs/{design_id}/parts #"physical_provisioner:ingest_data": "role:admin" +# Get health status +# GET /api/v1.0/health/extended +#"physical_provisioner:health_data": "role:admin" + diff --git a/drydock_provisioner/control/api.py b/drydock_provisioner/control/api.py index f410fd58..62f5abd0 100644 --- a/drydock_provisioner/control/api.py +++ b/drydock_provisioner/control/api.py @@ -1,4 +1,4 @@ -# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -23,6 +23,7 @@ from .tasks import TaskResource from .nodes import NodesResource from .nodes import NodeBuildDataResource from .health import HealthResource +from .health import HealthExtendedResource from .bootaction import BootactionUnitsResource from .bootaction import BootactionFilesResource from .bootaction import BootactionResource @@ -57,6 +58,8 @@ def start_api(state_manager=None, ingester=None, orchestrator=None): # API for managing orchestrator tasks ('/health', HealthResource(state_manager=state_manager, orchestrator=orchestrator)), + ('/health/extended', HealthExtendedResource(state_manager=state_manager, + orchestrator=orchestrator)), ('/tasks', TasksResource(state_manager=state_manager, orchestrator=orchestrator)), diff --git a/drydock_provisioner/control/health.py b/drydock_provisioner/control/health.py index d81715c7..01ef9cf8 100644 --- a/drydock_provisioner/control/health.py +++ b/drydock_provisioner/control/health.py @@ -1,4 +1,4 @@ -# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,44 +14,84 @@ import falcon import json -from drydock_provisioner.control.base import BaseResource +from drydock_provisioner.control.base import StatefulResource from drydock_provisioner.drivers.node.maasdriver.actions.node import ValidateNodeServices +from drydock_provisioner.objects.healthcheck import HealthCheck +from drydock_provisioner.objects.healthcheck import HealthCheckMessage from drydock_provisioner.objects.fields import ActionResult import drydock_provisioner.objects.fields as hd_fields +import drydock_provisioner.policy as policy -class HealthResource(BaseResource): +class HealthResource(StatefulResource): """ - Return empty response/body to show - that Drydock is healthy + Returns empty response body that Drydock is healthy """ - def __init__(self, state_manager=None, orchestrator=None, **kwargs): + def __init__(self, orchestrator=None, **kwargs): """Object initializer. - :param state_manager: instance of Drydock state_manager + :param orchestrator: instance of Drydock orchestrator """ super().__init__(**kwargs) - self.state_manager = state_manager self.orchestrator = orchestrator def on_get(self, req, resp): """ - Returns 204 on success, otherwise 500 with a response body. + Returns 204 on healthy, otherwise 503, without response body. """ - healthy = True + hc = HealthCheckCombined(state_manager=self.state_manager, + orchestrator=self.orchestrator, + extended=False) + return hc.get(req, resp) + +class HealthExtendedResource(StatefulResource): + """ + Returns response body that Drydock is healthy + """ + def __init__(self, orchestrator=None, **kwargs): + """Object initializer. + + :param orchestrator: instance of Drydock orchestrator + """ + super().__init__(**kwargs) + self.orchestrator = orchestrator + + @policy.ApiEnforcer('physical_provisioner:health_data') + def on_get(self, req, resp): + """ + Returns 200 on success, otherwise 503, with a response body. + """ + hc = HealthCheckCombined(state_manager=self.state_manager, + orchestrator=self.orchestrator, + extended=True) + return hc.get(req, resp) + +class HealthCheckCombined(object): + """ + Returns Drydock health check status. + """ + def __init__(self, state_manager=None, orchestrator=None, extended=False): + """Object initializer. + + :param orchestrator: instance of Drydock orchestrator + """ + self.state_manager = state_manager + self.orchestrator = orchestrator + self.extended = extended + + def get(self, req, resp): + """ + Returns updated response with body if extended. + """ + health_check = HealthCheck() # Test database connection try: now = self.state_manager.get_now() if now is None: raise Exception('None received from database for now()') except Exception as ex: - healthy = False - resp.body = json.dumps({ - 'type': 'error', - 'message': 'Database error', - 'retry': True - }) - resp.status = falcon.HTTP_500 + hcm = HealthCheckMessage(msg='Unable to connect to database', error=True) + health_check.add_detail_msg(msg=hcm) # Test MaaS connection try: @@ -61,13 +101,15 @@ class HealthResource(BaseResource): if maas_validation.task.get_status() == ActionResult.Failure: raise Exception('MaaS task failure') except Exception as ex: - healthy = False - resp.body = json.dumps({ - 'type': 'error', - 'message': 'MaaS error', - 'retry': True - }) - resp.status = falcon.HTTP_500 + hcm = HealthCheckMessage(msg='Unable to connect to MaaS', error=True) + health_check.add_detail_msg(msg=hcm) - if healthy: + if self.extended: + resp.body = json.dumps(health_check.to_dict()) + + if health_check.is_healthy() and self.extended: + resp.status = falcon.HTTP_200 + elif health_check.is_healthy(): resp.status = falcon.HTTP_204 + else: + resp.status = falcon.HTTP_503 diff --git a/drydock_provisioner/objects/healthcheck.py b/drydock_provisioner/objects/healthcheck.py new file mode 100644 index 00000000..d6d4ab24 --- /dev/null +++ b/drydock_provisioner/objects/healthcheck.py @@ -0,0 +1,74 @@ +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Models for representing health check status.""" + +class HealthCheck(object): + """Specialized status for health check status.""" + + def __init__(self): + self.error_count = 0 + self.code = 200 + self.message = '' + self.status = 'Success' + self.message_list = [] + + def add_detail_msg(self, msg=None): + """Add a detailed health check message. + + :param msg: instance of HealthCheckMessage + """ + self.message_list.append(msg) + + if msg.error or msg.level == "Error": + self.error_count = self.error_count + 1 + self.code = 503 + self.message = 'DryDock failed to respond' + self.status = 'Failure' + + def to_dict(self): + return { + 'kind': 'Status', + 'apiVersion': 'v1.0', + 'metadata': {}, + 'status': self.status, + 'message': self.message, + 'reason': 'HealthCheck', + 'details': { + 'errorCount': self.error_count, + 'messageList': [x.to_dict() for x in self.message_list], + }, + 'code': self.code + } + + def is_healthy(self): + if self.error_count == 0: + return True + return False + + +class HealthCheckMessage(object): + """Message describing details of a health check.""" + + def __init__(self, msg, error=False): + self.message = msg + self.error = error + + def to_dict(self): + """Convert to a dictionary in prep for JSON/YAML serialization.""" + _dict = { + 'message': self.message, + 'error': self.error, + 'kind': 'SimpleMessage', + } + return _dict diff --git a/drydock_provisioner/policy.py b/drydock_provisioner/policy.py index 9fefd182..970111a2 100644 --- a/drydock_provisioner/policy.py +++ b/drydock_provisioner/policy.py @@ -1,4 +1,4 @@ -# Copyright 2017 AT&T Intellectual Property. All other rights reserved. +# Copyright 2018 AT&T Intellectual Property. All other rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -125,6 +125,13 @@ class DrydockPolicy(object): }, { 'path': '/api/v1.0/designs/{design_id}/parts', 'method': 'POST' + }]), + policy.DocumentedRuleDefault( + 'physical_provisioner:health_data', 'role:admin', + 'et health status', + [{ + 'path': '/api/v1.0/health/extended', + 'method': 'GET' }]) ] diff --git a/etc/drydock/policy.yaml.sample b/etc/drydock/policy.yaml.sample index 0812ca43..33dd92a3 100644 --- a/etc/drydock/policy.yaml.sample +++ b/etc/drydock/policy.yaml.sample @@ -52,3 +52,7 @@ # POST /api/v1.0/validatedesign #"physical_provisioner:validate_site_design": "role:admin" +# Get health status +# GET /api/v1.0/health/extended +#"physical_provisioner:health_data": "role:admin" +