From 06d6747b50e293476079d798f8df92cef70d7811 Mon Sep 17 00:00:00 2001 From: Scott Hussey Date: Tue, 26 Jun 2018 14:41:24 -0500 Subject: [PATCH] Make client HTTP connections resilient - Make Keystone session use a timeout to prevent hangs - Support retries - Make the above configurable Change-Id: I7123bd2fdcd329eae5b8b40f09168a1d599fa0f7 --- drydock_provisioner/config.py | 19 +++++++++++ .../statemgmt/design/resolver.py | 29 ++++++++++++---- etc/drydock/drydock.conf.sample | 33 +++++++++++++++---- 3 files changed, 67 insertions(+), 14 deletions(-) diff --git a/drydock_provisioner/config.py b/drydock_provisioner/config.py index 3b167ff3..0466cfbc 100644 --- a/drydock_provisioner/config.py +++ b/drydock_provisioner/config.py @@ -103,6 +103,23 @@ class DrydockConfig(object): 'report_url', default='http://localhost:9000/api/v1.0/bootactions/') ] + + # Options for network traffic + network_options = [ + cfg.IntOpt( + 'http_client_connect_timeout', + default=16, + help='Timeout for initial read of outgoing HTTP calls from Drydock in seconds.'), + cfg.IntOpt( + 'http_client_read_timeout', + default=300, + help='Timeout for initial read of outgoing HTTP calls from Drydock in seconds.'), + cfg.IntOpt( + 'http_client_retries', + default=3, + help='Number of retries for transient errors of outgoing HTTP calls from Drydock.'), + ] + # Enabled plugins plugin_options = [ cfg.StrOpt( @@ -184,6 +201,7 @@ class DrydockConfig(object): DrydockConfig.bootactions_options, group='bootactions') self.conf.register_opts(DrydockConfig.logging_options, group='logging') self.conf.register_opts(DrydockConfig.plugin_options, group='plugins') + self.conf.register_opts(DrydockConfig.network_options, group='network') self.conf.register_opts( DrydockConfig.database_options, group='database') self.conf.register_opts( @@ -204,6 +222,7 @@ def list_opts(): 'plugins': DrydockConfig.plugin_options, 'timeouts': DrydockConfig.timeout_options, 'database': DrydockConfig.database_options, + 'network': DrydockConfig.network_options, } package_path = os.path.dirname(os.path.abspath(__file__)) diff --git a/drydock_provisioner/statemgmt/design/resolver.py b/drydock_provisioner/statemgmt/design/resolver.py index 0ab7cf6a..8eea6731 100644 --- a/drydock_provisioner/statemgmt/design/resolver.py +++ b/drydock_provisioner/statemgmt/design/resolver.py @@ -15,6 +15,7 @@ import urllib.parse import re +import time import logging import requests @@ -23,6 +24,7 @@ from beaker.util import parse_cache_config_options from drydock_provisioner import error as errors from drydock_provisioner.util import KeystoneUtils +from drydock_provisioner.config import config_mgr cache_opts = { 'cache.type': 'memory', @@ -30,6 +32,7 @@ cache_opts = { } cache = CacheManager(**parse_cache_config_options(cache_opts)) +LOG = logging.getLogger(__name__) class ReferenceResolver(object): """Class for handling different data references to resolve them data.""" @@ -54,8 +57,16 @@ class ReferenceResolver(object): "Invalid reference scheme %s: no handler." % design_uri.scheme) else: - # Have to do a little magic to call the classmethod as a pointer - return handler.__get__(None, cls)(design_uri) + tries = 0 + while tries < config_mgr.conf.network.http_client_retries: + try: + # Have to do a little magic to call the classmethod as a pointer + return handler.__get__(None, cls)(design_uri) + except Exception as ex: + tries = tries + 1 + if tries < config_mgr.conf.network.http_client_retries: + LOG.debug("Retrying reference after failure: %s" % str(ex)) + time.sleep(5 ** tries) except ValueError: raise errors.InvalidDesignReference( "Cannot resolve design reference %s: unable to parse as valid URI." @@ -74,9 +85,9 @@ class ReferenceResolver(object): response = requests.get( design_uri.geturl(), auth=(design_uri.username, design_uri.password), - timeout=30) + timeout=get_client_timeouts()) else: - response = requests.get(design_uri.geturl(), timeout=30) + response = requests.get(design_uri.geturl(), timeout=get_client_timeouts()) return response.content @@ -107,9 +118,8 @@ class ReferenceResolver(object): url = urllib.parse.urlunparse( (new_scheme, design_uri.netloc, design_uri.path, design_uri.params, design_uri.query, design_uri.fragment)) - logger = logging.getLogger(__name__) - logger.debug("Calling Keystone session for url %s" % str(url)) - resp = ks_sess.get(url) + LOG.debug("Calling Keystone session for url %s" % str(url)) + resp = ks_sess.get(url, timeout=get_client_timeouts()) if resp.status_code >= 400: raise errors.InvalidDesignReference( "Received error code for reference %s: %s - %s" % @@ -123,3 +133,8 @@ class ReferenceResolver(object): 'deckhand+http': resolve_reference_ucp, 'promenade+http': resolve_reference_ucp, } + +def get_client_timeouts(): + """Return a tuple of timeouts for the request library.""" + return (config_mgr.conf.network.http_client_connect_timeout, + config_mgr.conf.network.http_client_read_timeout) diff --git a/etc/drydock/drydock.conf.sample b/etc/drydock/drydock.conf.sample index 0ba497f2..a8fbb556 100644 --- a/etc/drydock/drydock.conf.sample +++ b/etc/drydock/drydock.conf.sample @@ -43,11 +43,11 @@ #domain_name = # Project ID to scope to (string value) -# Deprecated group/name - [keystone_authtoken]/tenant-id +# Deprecated group/name - [keystone_authtoken]/tenant_id #project_id = # Project name to scope to (string value) -# Deprecated group/name - [keystone_authtoken]/tenant-name +# Deprecated group/name - [keystone_authtoken]/tenant_name #project_name = # Domain ID containing project (string value) @@ -73,7 +73,7 @@ #user_id = # Username (string value) -# Deprecated group/name - [keystone_authtoken]/user-name +# Deprecated group/name - [keystone_authtoken]/user_name #username = # User's domain id (string value) @@ -159,7 +159,10 @@ # in the cache. If ENCRYPT, token data is encrypted and authenticated in the # cache. If the value is not one of these options or empty, auth_token will # raise an exception on initialization. (string value) -# Allowed values: None, MAC, ENCRYPT +# Possible values: +# None - +# MAC - +# ENCRYPT - #memcache_security_strategy = None # (Optional, mandatory if memcache_security_strategy is defined) This string is @@ -274,6 +277,25 @@ #poll_interval = 10 +[network] + +# +# From drydock_provisioner +# + +# Timeout for initial read of outgoing HTTP calls from Drydock in seconds. +# (integer value) +#http_client_connect_timeout = 16 + +# Timeout for initial read of outgoing HTTP calls from Drydock in seconds. +# (integer value) +#http_client_read_timeout = 300 + +# Number of retries for transient errors of outgoing HTTP calls from Drydock. +# (integer value) +#http_client_retries = 3 + + [oslo_policy] # @@ -281,11 +303,9 @@ # # The file that defines policies. (string value) -# Deprecated group/name - [DEFAULT]/policy_file #policy_file = policy.json # Default rule. Enforced when a requested rule is not found. (string value) -# Deprecated group/name - [DEFAULT]/policy_default_rule #policy_default_rule = default # Directories where policy configuration files are stored. They can be relative @@ -293,7 +313,6 @@ # absolute paths. The file defined by policy_file must exist for these # directories to be searched. Missing or empty directories are ignored. (multi # valued) -# Deprecated group/name - [DEFAULT]/policy_dirs #policy_dirs = policy.d