Update DryDock Operator
The following errors [0] were encountered during our end-to-end testing. This is a result of extended execution of the workflow that led to expiration of the keystone token. It is also possible for the 'prepare_site' task to take more than 120 seconds to complete. Hence we are increasing the time out for the 'prepare_site_task_timeout' variable to 300 seconds. This P.S. addresses the above 2 observations [0] Logs from DryDock Authorization failed for token Identity response: {"error": {"message": "Failed to validate token", "code": 404, "title": "Not Found"}} Authorization failed for token Change-Id: I4760e390822e6e8c9540216035e263d054fde400
This commit is contained in:
parent
609bc0a624
commit
5190189a60
|
@ -323,7 +323,7 @@ conf:
|
||||||
verify_site_query_interval: 10
|
verify_site_query_interval: 10
|
||||||
verify_site_task_timeout: 60
|
verify_site_task_timeout: 60
|
||||||
prepare_site_query_interval: 10
|
prepare_site_query_interval: 10
|
||||||
prepare_site_task_timeout: 120
|
prepare_site_task_timeout: 300
|
||||||
prepare_node_query_interval: 30
|
prepare_node_query_interval: 30
|
||||||
prepare_node_task_timeout: 1800
|
prepare_node_task_timeout: 1800
|
||||||
deploy_node_query_interval: 30
|
deploy_node_query_interval: 30
|
||||||
|
|
|
@ -68,7 +68,7 @@
|
||||||
#prepare_site_query_interval = 10
|
#prepare_site_query_interval = 10
|
||||||
|
|
||||||
# Time out (in seconds) for prepare_site task (integer value)
|
# Time out (in seconds) for prepare_site task (integer value)
|
||||||
#prepare_site_task_timeout = 120
|
#prepare_site_task_timeout = 300
|
||||||
|
|
||||||
# Query interval (in seconds) for prepare_node task (integer value)
|
# Query interval (in seconds) for prepare_node task (integer value)
|
||||||
#prepare_node_query_interval = 30
|
#prepare_node_query_interval = 30
|
||||||
|
|
|
@ -147,7 +147,7 @@ SECTIONS = [
|
||||||
),
|
),
|
||||||
cfg.IntOpt(
|
cfg.IntOpt(
|
||||||
'prepare_site_task_timeout',
|
'prepare_site_task_timeout',
|
||||||
default=120,
|
default=300,
|
||||||
help='Time out (in seconds) for prepare_site task'
|
help='Time out (in seconds) for prepare_site task'
|
||||||
),
|
),
|
||||||
cfg.IntOpt(
|
cfg.IntOpt(
|
||||||
|
|
|
@ -28,6 +28,7 @@ from airflow.utils.decorators import apply_defaults
|
||||||
import drydock_provisioner.drydock_client.client as client
|
import drydock_provisioner.drydock_client.client as client
|
||||||
import drydock_provisioner.drydock_client.session as session
|
import drydock_provisioner.drydock_client.session as session
|
||||||
from check_k8s_node_status import check_node_status
|
from check_k8s_node_status import check_node_status
|
||||||
|
from drydock_provisioner import error as errors
|
||||||
from service_endpoint import ucp_service_endpoint
|
from service_endpoint import ucp_service_endpoint
|
||||||
from service_token import shipyard_service_token
|
from service_token import shipyard_service_token
|
||||||
|
|
||||||
|
@ -152,7 +153,7 @@ class DryDockOperator(BaseOperator):
|
||||||
# Create Task for prepare_site
|
# Create Task for prepare_site
|
||||||
elif self.action == 'prepare_site':
|
elif self.action == 'prepare_site':
|
||||||
# Default settings for 'prepare_site' execution is to query
|
# Default settings for 'prepare_site' execution is to query
|
||||||
# the task every 10 seconds and to time out after 120 seconds
|
# the task every 10 seconds and to time out after 300 seconds
|
||||||
query_interval = config.get('drydock',
|
query_interval = config.get('drydock',
|
||||||
'prepare_site_query_interval')
|
'prepare_site_query_interval')
|
||||||
task_timeout = config.get('drydock', 'prepare_site_task_timeout')
|
task_timeout = config.get('drydock', 'prepare_site_task_timeout')
|
||||||
|
@ -249,8 +250,8 @@ class DryDockOperator(BaseOperator):
|
||||||
logging.info('Task ID is %s', task_id)
|
logging.info('Task ID is %s', task_id)
|
||||||
|
|
||||||
# Query Task
|
# Query Task
|
||||||
self.drydock_query_task(drydock_client, interval, time_out,
|
self.drydock_query_task(drydock_client, context, interval,
|
||||||
task_id)
|
time_out, task_id)
|
||||||
|
|
||||||
def drydock_perform_task(self, drydock_client, context,
|
def drydock_perform_task(self, drydock_client, context,
|
||||||
perform_task, nodes_filter):
|
perform_task, nodes_filter):
|
||||||
|
@ -279,7 +280,13 @@ class DryDockOperator(BaseOperator):
|
||||||
else:
|
else:
|
||||||
raise AirflowException("Unable to create task!")
|
raise AirflowException("Unable to create task!")
|
||||||
|
|
||||||
def drydock_query_task(self, drydock_client, interval, time_out, task_id):
|
def drydock_query_task(self, drydock_client, context, interval,
|
||||||
|
time_out, task_id):
|
||||||
|
|
||||||
|
# Initialize Variables
|
||||||
|
keystone_token_expired = False
|
||||||
|
new_dd_client = None
|
||||||
|
dd_client = drydock_client
|
||||||
|
|
||||||
# Calculate number of times to execute the 'for' loop
|
# Calculate number of times to execute the 'for' loop
|
||||||
# Convert 'time_out' and 'interval' from string into integer
|
# Convert 'time_out' and 'interval' from string into integer
|
||||||
|
@ -290,15 +297,49 @@ class DryDockOperator(BaseOperator):
|
||||||
# Query task status
|
# Query task status
|
||||||
for i in range(0, end_range + 1):
|
for i in range(0, end_range + 1):
|
||||||
|
|
||||||
|
if keystone_token_expired:
|
||||||
|
logging.info("Established new drydock session")
|
||||||
|
dd_client = new_dd_client
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Retrieve current task state
|
# Retrieve current task state
|
||||||
task_state = drydock_client.get_task(task_id=task_id)
|
task_state = dd_client.get_task(task_id=task_id)
|
||||||
task_status = task_state.get('status')
|
task_status = task_state.get('status')
|
||||||
task_result = task_state.get('result')['status']
|
task_result = task_state.get('result')['status']
|
||||||
|
|
||||||
logging.info("Current status of task id %s is %s",
|
logging.info("Current status of task id %s is %s",
|
||||||
task_id, task_status)
|
task_id, task_status)
|
||||||
|
|
||||||
|
keystone_token_expired = False
|
||||||
|
|
||||||
|
except errors.ClientUnauthorizedError as unauthorized_error:
|
||||||
|
|
||||||
|
# TODO: This is a temporary workaround. Drydock will be
|
||||||
|
# updated with the appropriate fix in the drydock api
|
||||||
|
# client by having the session detect a 401/403 response
|
||||||
|
# and refresh the token appropriately.
|
||||||
|
# Logs drydock client unauthorized error
|
||||||
|
keystone_token_expired = True
|
||||||
|
logging.error(unauthorized_error)
|
||||||
|
|
||||||
|
# Set up new drydock client with new keystone token
|
||||||
|
logging.info("Setting up new drydock session...")
|
||||||
|
|
||||||
|
context['svc_endpoint'] = ucp_service_endpoint(
|
||||||
|
self, svc_type='physicalprovisioner')
|
||||||
|
|
||||||
|
new_dd_client = self.drydock_session_client(context)
|
||||||
|
|
||||||
|
except errors.ClientForbiddenError as forbidden_error:
|
||||||
|
raise AirflowException(forbidden_error)
|
||||||
|
|
||||||
|
except errors.ClientError as client_error:
|
||||||
|
raise AirflowException(client_error)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
|
# There can be instances where there are intermittent network
|
||||||
|
# issues that prevents us from retrieving the task state. We
|
||||||
|
# will want to retry in such situations.
|
||||||
logging.info("Unable to retrieve task state. Retrying...")
|
logging.info("Unable to retrieve task state. Retrying...")
|
||||||
|
|
||||||
# Raise Time Out Exception
|
# Raise Time Out Exception
|
||||||
|
|
|
@ -14,7 +14,7 @@ deploy_node_task_timeout = 3600
|
||||||
prepare_node_query_interval = 30
|
prepare_node_query_interval = 30
|
||||||
prepare_node_task_timeout = 1800
|
prepare_node_task_timeout = 1800
|
||||||
prepare_site_query_interval = 10
|
prepare_site_query_interval = 10
|
||||||
prepare_site_task_timeout = 120
|
prepare_site_task_timeout = 300
|
||||||
service_type = physicalprovisioner
|
service_type = physicalprovisioner
|
||||||
verify_site_query_interval = 10
|
verify_site_query_interval = 10
|
||||||
verify_site_task_timeout = 60
|
verify_site_task_timeout = 60
|
||||||
|
|
|
@ -16,7 +16,7 @@ deploy_node_task_timeout = 3600
|
||||||
prepare_node_query_interval = 30
|
prepare_node_query_interval = 30
|
||||||
prepare_node_task_timeout = 1800
|
prepare_node_task_timeout = 1800
|
||||||
prepare_site_query_interval = 10
|
prepare_site_query_interval = 10
|
||||||
prepare_site_task_timeout = 120
|
prepare_site_task_timeout = 300
|
||||||
service_type = physicalprovisioner
|
service_type = physicalprovisioner
|
||||||
verify_site_query_interval = 10
|
verify_site_query_interval = 10
|
||||||
verify_site_task_timeout = 60
|
verify_site_task_timeout = 60
|
||||||
|
|
Loading…
Reference in New Issue