From a171f3c7a5f3b60e7a14c92c1d49e12642f1a015 Mon Sep 17 00:00:00 2001 From: "SPEARS, DUSTIN (ds443n)" Date: Mon, 4 Apr 2022 10:55:33 -0400 Subject: [PATCH] Add postgresql retention cronjob Adding cronjob to purge the drydock DB based on retention day value. Additionally adding drydock API endpoint for purging the tasks and result_message tables and running vacuum full on drydock DB. Change-Id: Ibcce61ecdafa637ca3ffec654152060aae26d4b8 --- charts/drydock/Chart.yaml | 2 +- .../templates/bin/_db-pg-purge-table.sh.tpl | 43 ++++++++++ charts/drydock/templates/configmap-bin.yaml | 2 + .../templates/cronjob-drydock-db-cleanup.yaml | 71 ++++++++++++++++ charts/drydock/templates/etc/_policy.yaml.tpl | 4 + charts/drydock/values.yaml | 24 ++++++ doc/source/_static/policy.yaml.sample | 4 + etc/drydock/policy.yaml.sample | 4 + .../drydock_provisioner/cli/task/actions.py | 14 ++++ .../drydock_provisioner/cli/task/commands.py | 13 +++ python/drydock_provisioner/control/tasks.py | 22 +++++ .../drydock_client/client.py | 16 ++++ .../drydock_client/session.py | 28 +++++++ python/drydock_provisioner/policy.py | 6 ++ python/drydock_provisioner/statemgmt/state.py | 80 +++++++++++++++++++ 15 files changed, 332 insertions(+), 1 deletion(-) create mode 100644 charts/drydock/templates/bin/_db-pg-purge-table.sh.tpl create mode 100644 charts/drydock/templates/cronjob-drydock-db-cleanup.yaml diff --git a/charts/drydock/Chart.yaml b/charts/drydock/Chart.yaml index 99a0672b..5b30a576 100644 --- a/charts/drydock/Chart.yaml +++ b/charts/drydock/Chart.yaml @@ -15,7 +15,7 @@ apiVersion: v1 description: A Helm chart for Drydock name: drydock -version: 0.1.0 +version: 0.1.1 keywords: - drydock home: https://github.com/openstack/airship-drydock diff --git a/charts/drydock/templates/bin/_db-pg-purge-table.sh.tpl b/charts/drydock/templates/bin/_db-pg-purge-table.sh.tpl new file mode 100644 index 00000000..279d91ca --- /dev/null +++ b/charts/drydock/templates/bin/_db-pg-purge-table.sh.tpl @@ -0,0 +1,43 @@ +#!/bin/bash + +{{/* +Copyright (c) 2017 AT&T Intellectual Property. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +set -ex + +log () { + msg_default="Need some text to log" + level_default="INFO" + component_default="Drydock Retention Cronjob" + + msg=${1:-$msg_default} + level=${2:-$level_default} + component=${3:-"$component_default"} + + echo "$(date +'%Y-%m-%d %H:%M:%S,%3N') - ${component} - ${level} - ${msg}" +} + +delete_tasks () { + drydock task delete --days {{ .Values.endpoints.postgresql.days_to_retain }} +} + +if delete_tasks ; then + log "DB table purge completed successfully" + exit 0 +else + log "Failed to purge tables!" "ERROR" + exit 1 +fi diff --git a/charts/drydock/templates/configmap-bin.yaml b/charts/drydock/templates/configmap-bin.yaml index c6b1f746..6574254c 100755 --- a/charts/drydock/templates/configmap-bin.yaml +++ b/charts/drydock/templates/configmap-bin.yaml @@ -32,5 +32,7 @@ data: {{- include "helm-toolkit.scripts.pg_db_init" . | indent 4 }} db-sync.sh: |+ {{ tuple "bin/_db-sync.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} + db-pg-purge-table.sh: |+ +{{ tuple "bin/_db-pg-purge-table.sh.tpl" . | include "helm-toolkit.utils.template" | indent 4 }} ... {{- end -}} diff --git a/charts/drydock/templates/cronjob-drydock-db-cleanup.yaml b/charts/drydock/templates/cronjob-drydock-db-cleanup.yaml new file mode 100644 index 00000000..af45a1ad --- /dev/null +++ b/charts/drydock/templates/cronjob-drydock-db-cleanup.yaml @@ -0,0 +1,71 @@ +{{/* +Copyright (c) 2017 AT&T Intellectual Property. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/}} + +{{- if .Values.manifests.cronjob_drydock_db_cleanup }} +{{- $envAll := . }} +{{- $serviceAccountName := "drydock-db-cleanup" }} +{{ tuple $envAll "db_cleanup" $serviceAccountName | include "helm-toolkit.snippets.kubernetes_pod_rbac_serviceaccount" }} +--- +apiVersion: batch/v1 +kind: CronJob +metadata: + name: drydock-db-cleanup + labels: +{{ tuple $envAll "drydock" "db-cleanup" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 4 }} + annotations: + {{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" }} +spec: + schedule: {{ .Values.endpoints.postgresql.cleanup_schedule | quote }} + jobTemplate: + spec: + template: + metadata: + labels: +{{ tuple $envAll "drydock" "db-cleanup" | include "helm-toolkit.snippets.kubernetes_metadata_labels" | indent 12 }} + annotations: +{{ tuple $envAll | include "helm-toolkit.snippets.release_uuid" | indent 12 }} +{{ dict "envAll" $envAll "podName" "drydock-db-cleanup" "containerNames" (list "drydock-db-cleanup") | include "helm-toolkit.snippets.kubernetes_mandatory_access_control_annotation" | indent 12 }} + configmap-bin-hash: {{ tuple "configmap-bin.yaml" . | include "helm-toolkit.utils.hash" }} + spec: +{{ dict "envAll" $envAll "application" "db_cleanup" | include "helm-toolkit.snippets.kubernetes_pod_security_context" | indent 10 }} + serviceAccountName: {{ $serviceAccountName }} + restartPolicy: OnFailure + nodeSelector: + {{ .Values.labels.job.node_selector_key }}: {{ .Values.labels.job.node_selector_value }} + containers: + - name: drydock-db-cleanup + image: {{ .Values.images.tags.drydock_db_cleanup | quote }} + imagePullPolicy: {{ .Values.images.pull_policy | quote }} +{{ tuple $envAll $envAll.Values.pod.resources.cronjobs.drydock_db_cleanup | include "helm-toolkit.snippets.kubernetes_resources" | indent 12 }} +{{ dict "envAll" $envAll "application" "db_cleanup" "container" "drydock_db_cleanup" | include "helm-toolkit.snippets.kubernetes_container_security_context" | indent 14 }} + envFrom: + - secretRef: + name: {{ .Values.secrets.identity.drydock }} + command: + - /tmp/db-pg-purge-table.sh + volumeMounts: + - name: drydock-bin + mountPath: /tmp/db-pg-purge-table.sh + subPath: db-pg-purge-table.sh + readOnly: true + volumes: + - name: drydock-bin + configMap: + name: drydock-bin + defaultMode: 0555 +... +{{- end }} + diff --git a/charts/drydock/templates/etc/_policy.yaml.tpl b/charts/drydock/templates/etc/_policy.yaml.tpl index 2a5e26a2..e472f553 100755 --- a/charts/drydock/templates/etc/_policy.yaml.tpl +++ b/charts/drydock/templates/etc/_policy.yaml.tpl @@ -34,6 +34,10 @@ # POST /api/v1.0/tasks #"physical_provisioner:destroy_node": "role:admin" +# Deletes tasks by age +# DELETE /api/v1.0/tasks +#"physical_provisioner:delete_tasks": "role:admin" + # Read loaded design data # GET /api/v1.0/designs # GET /api/v1.0/designs/{design_id} diff --git a/charts/drydock/values.yaml b/charts/drydock/values.yaml index eb1375e8..a2145ef7 100644 --- a/charts/drydock/values.yaml +++ b/charts/drydock/values.yaml @@ -36,6 +36,7 @@ images: ks_service: docker.io/openstackhelm/heat:newton ks_endpoints: docker.io/openstackhelm/heat:newton drydock_db_init: docker.io/postgres:9.5 + drydock_db_cleanup: quay.io/airshipit/drydock:master drydock_db_sync: quay.io/airshipit/drydock:master pull_policy: "IfNotPresent" #TODO(mattmceuen): This chart does not yet support local image caching @@ -66,6 +67,9 @@ pod: drydock-db-init: init: runtime/default drydock-db-init: runtime/default + drydock-db-cleanup: + init: runtime/default + drydock-db-cleanup: runtime/default drydock-db-sync: init: runtime/default drydock-db-sync: runtime/default @@ -88,6 +92,13 @@ pod: drydock_db_init: readOnlyRootFilesystem: true allowPrivilegeEscalation: false + db_cleanup: + pod: + runAsUser: 65534 + container: + drydock_db_cleanup: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false db_sync: pod: runAsUser: 65534 @@ -175,6 +186,14 @@ pod: requests: memory: "128Mi" cpu: "100m" + cronjobs: + drydock_db_cleanup: + limits: + memory: "128Mi" + cpu: "100m" + requests: + memory: "128Mi" + cpu: "100m" network_policy: drydock: @@ -184,6 +203,7 @@ network_policy: - {} manifests: + cronjob_drydock_db_cleanup: false job_ks_service: true job_ks_user: true job_ks_endpoints: true @@ -304,6 +324,10 @@ endpoints: # ca: replace # key: replace postgresql: + # frequency to run table purges + cleanup_schedule: "0 1 * * *" + # number of days to retain for the tasks table + days_to_retain: "90" name: postgresql auth: admin: diff --git a/doc/source/_static/policy.yaml.sample b/doc/source/_static/policy.yaml.sample index 22b23659..54f02d18 100644 --- a/doc/source/_static/policy.yaml.sample +++ b/doc/source/_static/policy.yaml.sample @@ -38,6 +38,10 @@ # POST /api/v1.0/tasks #"physical_provisioner:destroy_nodes": "role:admin" +# Deletes tasks by age +# DELETE /api/v1.0/tasks +#"physical_provisioner:delete_tasks": "role:admin" + # Create relabel_nodes task # POST /api/v1.0/tasks #"physical_provisioner:relabel_nodes": "role:admin" diff --git a/etc/drydock/policy.yaml.sample b/etc/drydock/policy.yaml.sample index 22b23659..54f02d18 100644 --- a/etc/drydock/policy.yaml.sample +++ b/etc/drydock/policy.yaml.sample @@ -38,6 +38,10 @@ # POST /api/v1.0/tasks #"physical_provisioner:destroy_nodes": "role:admin" +# Deletes tasks by age +# DELETE /api/v1.0/tasks +#"physical_provisioner:delete_tasks": "role:admin" + # Create relabel_nodes task # POST /api/v1.0/tasks #"physical_provisioner:relabel_nodes": "role:admin" diff --git a/python/drydock_provisioner/cli/task/actions.py b/python/drydock_provisioner/cli/task/actions.py index 18da7ea6..a1ad6dec 100644 --- a/python/drydock_provisioner/cli/task/actions.py +++ b/python/drydock_provisioner/cli/task/actions.py @@ -156,3 +156,17 @@ class TaskBuildData(CliAction): def invoke(self): return self.api_client.get_task_build_data(self.task_id) + +class TasksDelete(CliAction): + """Action to delete tasks in database.""" + + def __init__(self, api_client, days): + """ + :param DrydockClient api_client: the api client instance used for invocation. + :param str days: Number of days to keep of tasks based on the created timestamp + """ + super().__init__(api_client) + self.days = days + + def invoke(self): + return self.api_client.delete_tasks(days=self.days) diff --git a/python/drydock_provisioner/cli/task/commands.py b/python/drydock_provisioner/cli/task/commands.py index d7f207a1..a97bc629 100644 --- a/python/drydock_provisioner/cli/task/commands.py +++ b/python/drydock_provisioner/cli/task/commands.py @@ -20,6 +20,7 @@ from drydock_provisioner.cli.task.actions import TaskList from drydock_provisioner.cli.task.actions import TaskShow from drydock_provisioner.cli.task.actions import TaskCreate from drydock_provisioner.cli.task.actions import TaskBuildData +from drydock_provisioner.cli.task.actions import TasksDelete @click.group() @@ -130,3 +131,15 @@ def task_builddata(ctx, task_id=None, output='yaml'): click.echo( yaml.safe_dump( task_bd, allow_unicode=True, default_flow_style=False)) + + +@task.command(name='delete') +@click.option('--days', '-d', help='The required number of days to retain tasks') +@click.pass_context +def task_delete(ctx, days=None): + """Delete tasks from database""" + if not days: + ctx.fail('The number of days must be specified using --days or -d') + + click.echo( + TasksDelete(ctx.obj['CLIENT'], days=days).invoke()) diff --git a/python/drydock_provisioner/control/tasks.py b/python/drydock_provisioner/control/tasks.py index 92eaf305..98def5b4 100644 --- a/python/drydock_provisioner/control/tasks.py +++ b/python/drydock_provisioner/control/tasks.py @@ -86,6 +86,28 @@ class TasksResource(StatefulResource): self.return_error( resp, falcon.HTTP_500, message="Unknown error", retry=False) + @policy.ApiEnforcer('physical_provisioner:delete_tasks') + def on_delete(self, req, resp): + """Handler resource for /tasks delete endpoint.""" + try: + days_to_retain = int(req.params["days"]) + except Exception: + days_to_retain = 90 + + try: + retention_status = self.state_manager.task_retention( + retain_days=str(days_to_retain)) + if not retention_status: + resp.status = falcon.HTTP_404 + return + resp.body = "Tables purged successfully." + except Exception as e: + self.error(req.context, "Unknown error: %s" % (str(e))) + resp.body = "Unexpected error." + resp.status = falcon.HTTP_500 + return + resp.status = falcon.HTTP_200 + @policy.ApiEnforcer('physical_provisioner:validate_design') def task_validate_design(self, req, resp, json_data): """Create async task for validate design.""" diff --git a/python/drydock_provisioner/drydock_client/client.py b/python/drydock_provisioner/drydock_client/client.py index dde7a8dd..dec4a3d5 100644 --- a/python/drydock_provisioner/drydock_client/client.py +++ b/python/drydock_provisioner/drydock_client/client.py @@ -129,6 +129,22 @@ class DrydockClient(object): return resp.json() + def delete_tasks(self, days=None): + """ + Enforce retention policy. + + :param int days: default to 90 days retention of tasks table. + """ + + endpoint = 'v1.0/tasks' + endpoint = endpoint + '?days=' + str(days) + + resp = self.session.delete(endpoint) + + self._check_response(resp) + + return "Task table purged successfully." + def create_task(self, design_ref, task_action, node_filter=None): """ Create a new task in Drydock diff --git a/python/drydock_provisioner/drydock_client/session.py b/python/drydock_provisioner/drydock_client/session.py index b59720f9..f236e356 100644 --- a/python/drydock_provisioner/drydock_client/session.py +++ b/python/drydock_provisioner/drydock_client/session.py @@ -144,6 +144,34 @@ class DrydockSession(object): return resp + def delete(self, endpoint, query=None, timeout=None): + """ + Send a DELETE request to Drydock. If both body and data are specified, + body will will be used. + + :param string endpoint: The URL string following the hostname and API prefix + :param dict query: A dict of k, v pairs to add to the query string + :param timeout: A single or tuple value for connect, read timeout. + A single value indicates the read timeout only + :return: A requests.Response object + """ + + auth_refresh = False + while True: + url = self.base_url + endpoint + self.logger.debug('DELETE ' + url) + self.logger.debug('Query Params: ' + str(query)) + resp = self.__session.delete( + url, params=query, timeout=self._timeout(timeout)) + + if resp.status_code == 401 and not auth_refresh: + self.set_auth() + auth_refresh = True + else: + break + + return resp + def _timeout(self, timeout=None): """Calculate the default timeouts for this session diff --git a/python/drydock_provisioner/policy.py b/python/drydock_provisioner/policy.py index 8fa1fccc..dfeaf7ae 100644 --- a/python/drydock_provisioner/policy.py +++ b/python/drydock_provisioner/policy.py @@ -95,6 +95,12 @@ class DrydockPolicy(object): 'path': '/api/v1.0/tasks', 'method': 'POST' }]), + policy.DocumentedRuleDefault('physical_provisioner:delete_tasks', + 'role:admin', 'Deletes tasks by age', + [{ + 'path': '/api/v1.0/tasks', + 'method': 'DELETE' + }]), policy.DocumentedRuleDefault('physical_provisioner:relabel_nodes', 'role:admin', 'Create relabel_nodes task', [{ diff --git a/python/drydock_provisioner/statemgmt/state.py b/python/drydock_provisioner/statemgmt/state.py index 060ce6f1..277f85f6 100644 --- a/python/drydock_provisioner/statemgmt/state.py +++ b/python/drydock_provisioner/statemgmt/state.py @@ -241,6 +241,23 @@ class DrydockState(object): % (str(task_id), str(ex))) return False + def delete_result_message(self, task_id, msg): + """Delete a result message to database attached to task task_id. + + :param task_id: uuid.UUID ID of the task the msg belongs to + :param msg: instance of objects.TaskStatusMessage + """ + try: + with self.db_engine.connect() as conn: + query = self.result_message_tbl.delete().values( + task_id=task_id.bytes, **(msg.to_db())) + conn.execute(query) + return True + except Exception as ex: + self.logger.error("Error delete result message for task %s: %s" + % (str(task_id), str(ex))) + return False + def _assemble_tasks(self, task_list=None): """Attach all the appropriate result messages to the tasks in the list. @@ -304,6 +321,69 @@ class DrydockState(object): "Error updating task %s: %s" % (str(task.task_id), str(ex))) return False + def task_retention(self, retain_days): + """Delete all tasks in the database older than x days. + + :param days: number of days to keep tasks + """ + with self.db_engine.connect() as conn: + try: + query_tasks_text = sql.text( + "DELETE FROM tasks WHERE created < now() - interval '" + + retain_days + + " days'").execution_options(autocommit=True) + conn.execute(query_tasks_text) + conn.close() + except Exception as ex: + self.logger.error( + "Error deleting tasks: %s" % str(ex)) + return False + + with self.db_engine.connect() as conn: + try: + query_subtasks_text = ( + "DELETE FROM tasks " + "WHERE parent_task_id IS NOT NULL AND " + "parent_task_id NOT IN " + "(SELECT task_id FROM tasks);") + conn.execute(sql.text(query_subtasks_text)) + conn.close() + except Exception as ex: + self.logger.error( + "Error deleting subtasks: %s" % str(ex)) + return False + + with self.db_engine.connect() as conn: + try: + query_result_message_text = ( + "DELETE FROM result_message WHERE ts IN " + "(SELECT result_message.ts FROM result_message " + "LEFT JOIN tasks ON " + "result_message.task_id=tasks.task_id " + "WHERE tasks.task_id IS NULL);") + conn.execute(sql.text(query_result_message_text)) + conn.close() + except Exception as ex: + self.logger.error( + "Error deleting result messages: %s" % str(ex)) + return False + + with self.db_engine.connect() as conn: + try: + real_conn = conn.connection + old_isolation_level = real_conn.isolation_level + real_conn.set_isolation_level(0) + query_vacuum_text = sql.text("VACUUM FULL") + conn.execute(query_vacuum_text) + real_conn.set_isolation_level(old_isolation_level) + conn.close() + except Exception as ex: + self.logger.error( + "Error running vacuum full: %s" % str(ex)) + return False + + return True + def add_subtask(self, task_id, subtask_id): """Add new task to subtask list.