DRYD-2 MVP - phase 1 - node enlistment

Add node driver task for IdentifyNode
Implement MaaS API interface for Machines and Interfaces
This commit is contained in:
Scott Hussey 2017-06-12 15:15:17 -05:00
parent 01ac42ce9a
commit 02094040ec
14 changed files with 491 additions and 51 deletions

View File

@ -27,8 +27,8 @@ class DrydockConfig(object):
node_driver = {
'maasdriver': {
'api_key': 'KTMHgA42cNSMnfmJ82:cdg4yQUhp542aHsCTV:7Dc2KB9hQpWq3LfQAAAKAj6wdg22yWxZ',
'api_url': 'http://localhost:5240/MAAS/api/2.0/',
'api_key': 'UTBfxGL69XWjaffQek:NuKZSYGuBs6ZpYC6B9:byvXBgY8CsW5VQKxGdQjvJXtjXwr5G4U',
'api_url': 'http://10.23.19.16:30773/MAAS/api/2.0/',
},
}

View File

@ -28,6 +28,7 @@ class NodeDriver(ProviderDriver):
hd_fields.OrchestratorAction.CreateStorageTemplate,
hd_fields.OrchestratorAction.CreateBootMedia,
hd_fields.OrchestratorAction.PrepareHardwareConfig,
hd_fields.OrchestratorAction.IdentifyNode,
hd_fields.OrchestratorAction.ConfigureHardware,
hd_fields.OrchestratorAction.InterrogateNode,
hd_fields.OrchestratorAction.ApplyNodeNetworking,

View File

@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import logging
import drydock_provisioner.error as errors
import drydock_provisioner.config as config
import drydock_provisioner.drivers as drivers
@ -22,6 +25,7 @@ from .api_client import MaasRequestFactory
import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
import drydock_provisioner.drivers.node.maasdriver.models.subnet as maas_subnet
import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
class MaasNodeDriver(NodeDriver):
@ -34,6 +38,8 @@ class MaasNodeDriver(NodeDriver):
self.config = config.DrydockConfig.node_driver[self.driver_key]
self.logger = logging.getLogger('drydock.nodedriver.maasdriver')
def execute_task(self, task_id):
task = self.state_manager.get_task(task_id)
@ -104,6 +110,8 @@ class MaasNodeDriver(NodeDriver):
site_design = self.orchestrator.get_effective_site(design_id)
if task.action == hd_fields.OrchestratorAction.CreateNetworkTemplate:
self.orchestrator.task_field_update(task.get_id(), status=hd_fields.TaskStatus.Running)
subtask = self.orchestrator.create_task(task_model.DriverTask,
parent_task_id=task.get_id(), design_id=design_id,
action=task.action, site_name=task.site_name,
@ -111,8 +119,13 @@ class MaasNodeDriver(NodeDriver):
runner = MaasTaskRunner(state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id(),config=self.config)
self.logger.info("Starting thread for task %s to create network templates" % (subtask.get_id()))
runner.start()
# TODO Figure out coherent system for putting all the timeouts in
# the config
runner.join(timeout=120)
if runner.is_alive():
@ -120,17 +133,85 @@ class MaasNodeDriver(NodeDriver):
'retry': False,
'detail': 'MaaS Network creation timed-out'
}
self.logger.warn("Thread for task %s timed out after 120s" % (subtask.get_id()))
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail=result)
else:
subtask = self.state_manager.get_task(subtask.get_id())
self.logger.info("Thread for task %s completed - result %s" % (subtask.get_id(), subtask.get_result()))
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=subtask.get_result())
return
elif task.action == hd_fields.OrchestratorAction.IdentifyNode:
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Running)
subtasks = []
result_detail = {
'detail': []
}
for n in task.node_list:
subtask = self.orchestrator.create_task(task_model.DriverTask,
parent_task_id=task.get_id(), design_id=design_id,
action=hd_fields.OrchestratorAction.IdentifyNode,
site_name=task.site_name,
task_scope={'site': task.site_name, 'node_names': [n]})
runner = MaasTaskRunner(state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id(),config=self.config)
self.logger.info("Starting thread for task %s to identify node %s" % (subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
running_subtasks = len(subtasks)
attempts = 0
worked = failed = False
#TODO Add timeout to config
while running_subtasks > 0 and attempts < 3:
for t in subtasks:
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info("Task %s to identify node %s complete - status %s" %
(subtask.get_id(), n, subtask.get_result()))
result_detail['detail'].extend(subtask.result_detail['detail'])
running_subtasks = running_subtasks - 1
if subtask.result in [hd_fields.ActionResult.Success,
hd_fields.ActionResult.PartialSuccess]:
worked = True
elif subtask.result in [hd_fields.ActionResult.Failure,
hd_fields.ActionResult.PartialSuccess]:
failed = True
time.sleep(1 * 60)
attempts = attempts + 1
if running_subtasks > 0:
self.logger.warn("Time out for task %s before all subtask threads complete" % (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append('Some subtasks did not complete before the timeout threshold')
if worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
class MaasTaskRunner(drivers.DriverTaskRunner):
@ -138,6 +219,7 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
super(MaasTaskRunner, self).__init__(**kwargs)
self.driver_config = config
self.logger = logging.getLogger('drydock.nodedriver.maasdriver')
def execute_task(self):
task_action = self.task.action
@ -314,4 +396,52 @@ class MaasTaskRunner(drivers.DriverTaskRunner):
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=action_result,
result_detail=result_detail)
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.IdentifyNode:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except:
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error accessing MaaS Machines API', 'retry': True})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
for n in nodes:
try:
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(node)
if machine is not None:
worked = True
result_detail['detail'].append("Node %s identified in MaaS" % n)
else:
failed = True
result_detail['detail'].append("Node %s not found in MaaS" % n)
except Exception as ex:
failed = True
result_detail['detail'].append("Error identifying node %s: %s" % (n, str(ex)))
result = None
if worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
elif failed:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)

View File

@ -13,6 +13,7 @@
# limitations under the License.
import json
import re
import logging
import drydock_provisioner.error as errors
"""
@ -28,6 +29,7 @@ class ResourceBase(object):
def __init__(self, api_client, **kwargs):
self.api_client = api_client
self.logger = logging.getLogger('drydock.drivers.maasdriver')
for f in self.fields:
if f in kwargs.keys():
@ -143,13 +145,16 @@ class ResourceBase(object):
return i
"""
A collection of MaaS resources.
Rather than a simple list, we will key the collection on resource
ID for more efficient access.
"""
class ResourceCollectionBase(object):
"""
A collection of MaaS resources.
Rather than a simple list, we will key the collection on resource
ID for more efficient access.
:param api_client: An instance of api_client.MaasRequestFactory
"""
collection_url = ''
collection_resource = ResourceBase
@ -157,12 +162,13 @@ class ResourceCollectionBase(object):
def __init__(self, api_client):
self.api_client = api_client
self.resources = {}
self.logger = logging.getLogger('drydock.drivers.maasdriver')
"""
Parse URL for placeholders and replace them with current
instance values
"""
def interpolate_url(self):
"""
Parse URL for placeholders and replace them with current
instance values
"""
pattern = '\{([a-z_]+)\}'
regex = re.compile(pattern)
start = 0

View File

@ -0,0 +1,34 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
class Interface(model_base.ResourceBase):
resource_url = 'nodes/{system_id}/interfaces/{resource_id}/'
fields = ['resource_id', 'system_id', 'name', 'type', 'mac_address', 'vlan',
'links', 'effective_mtu']
json_fields = ['name', 'type', 'mac_address', 'vlan', 'links', 'effective_mtu']
def __init__(self, api_client, **kwargs):
super(Interface, self).__init__(api_client, **kwargs)
class Interfaces(model_base.ResourceCollectionBase):
collection_url = 'nodes/{system_id}/interfaces/'
collection_resource = Interface
def __init__(self, api_client, **kwargs):
super(Interfaces, self).__init__(api_client)
self.system_id = kwargs.get('system_id', None)

View File

@ -0,0 +1,185 @@
# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
import drydock_provisioner.drivers.node.maasdriver.models.interface as maas_interface
import bson
import yaml
class Machine(model_base.ResourceBase):
resource_url = 'machines/{resource_id}/'
fields = ['resource_id', 'hostname', 'power_type', 'power_state', 'power_parameters', 'interfaces',
'boot_interface', 'memory', 'cpu_count', 'tag_names']
json_fields = ['hostname', 'power_type']
def __init__(self, api_client, **kwargs):
super(Machine, self).__init__(api_client, **kwargs)
# Replace generic dicts with interface collection model
if getattr(self, 'resource_id', None) is not None:
self.interfaces = maas_interface.Interfaces(api_client, system_id=self.resource_id)
self.interfaces.refresh()
def get_power_params(self):
url = self.interpolate_url()
resp = self.api_client.get(url, op='power_parameters')
if resp.status_code == 200:
self.power_parameters = resp.json()
def commission(self, debug=False):
url = self.interpolate_url()
# If we want to debug this node commissioning, enable SSH
# after commissioning and leave the node powered up
options = {'enable_ssh': '1' if debug else '0'}
resp = self.api_client.post(url, op='commission', files=options)
# Need to sort out how to handle exceptions
if not resp.ok:
raise Exception()
def get_details(self):
url = self.interpolate_url()
resp = self.api_client.get(url, op='details')
if resp.status_code == 200:
detail_config = bson.loads(resp.text)
return detail_config
def to_dict(self):
"""
Serialize this resource instance into a dict matching the
MAAS representation of the resource
"""
data_dict = {}
for f in self.json_fields:
if getattr(self, f, None) is not None:
if f == 'resource_id':
data_dict['system_id'] = getattr(self, f)
else:
data_dict[f] = getattr(self, f)
return data_dict
@classmethod
def from_dict(cls, api_client, obj_dict):
"""
Create a instance of this resource class based on a dict
of MaaS type attributes
Customized for Machine due to use of system_id instead of id
as resource key
:param api_client: Instance of api_client.MaasRequestFactory for accessing MaaS API
:param obj_dict: Python dict as parsed from MaaS API JSON representing this resource type
"""
refined_dict = {k: obj_dict.get(k, None) for k in cls.fields}
if 'system_id' in obj_dict.keys():
refined_dict['resource_id'] = obj_dict.get('system_id')
i = cls(api_client, **refined_dict)
return i
class Machines(model_base.ResourceCollectionBase):
collection_url = 'machines/'
collection_resource = Machine
def __init__(self, api_client, **kwargs):
super(Machines, self).__init__(api_client)
# Add the OOB power parameters to each machine instance
def collect_power_params(self):
for k, v in self.resources.items():
v.get_power_params()
def identify_baremetal_node(self, node_model, update_name=True):
"""
Search all the defined MaaS Machines and attempt to match
one against the provided Drydock BaremetalNode model. Update
the MaaS instance with the correct hostname
:param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource
:param update_name: Whether Drydock should update the MaaS resource name to match the Drydock design
"""
node_oob_network = node_model.oob_network
node_oob_ip = node_model.get_network_address(node_oob_network)
if node_oob_ip is None:
self.logger.warn("Node model missing OOB IP address")
raise ValueError('Node model missing OOB IP address')
try:
self.collect_power_params()
maas_node = self.singleton({'power_params.power_address': node_oob_ip})
self.logger.debug("Found MaaS resource %s matching Node %s" % (maas_node.resource_id, node_model.get_id()))
if maas_node.hostname != node_model.name and update_name:
maas_node.hostname = node_model.name
maas_node.update()
self.logger.debug("Updated MaaS resource %s hostname to %s" % (maas_node.resource_id, node_model.name))
return maas_node
except ValueError as ve:
self.logger.warn("Error locating matching MaaS resource for OOB IP %s" % (node_oob_ip))
return None
def query(self, query):
"""
Custom query method to deal with complex fields
"""
result = list(self.resources.values())
for (k, v) in query.items():
if k.startswith('power_params.'):
field = k[13:]
result = [i for i in result
if str(getattr(i,'power_parameters', {}).get(field, None)) == str(v)]
else:
result = [i for i in result
if str(getattr(i, k, None)) == str(v)]
return result
def add(self, res):
"""
Create a new resource in this collection in MaaS
Customize as Machine resources use 'system_id' instead of 'id'
"""
data_dict = res.to_dict()
url = self.interpolate_url()
resp = self.api_client.post(url, files=data_dict)
if resp.status_code == 200:
resp_json = resp.json()
res.set_resource_id(resp_json.get('system_id'))
return res
raise errors.DriverError("Failed updating MAAS url %s - return code %s"
% (url, resp.status_code))

View File

@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import time
import logging
from pyghmi.ipmi.command import Command
@ -34,15 +35,19 @@ class PyghmiDriver(oob.OobDriver):
self.driver_key = "pyghmi_driver"
self.driver_desc = "Pyghmi OOB Driver"
self.logger = logging.getLogger('drydock.oobdriver.pyghmi')
self.config = config.DrydockConfig.node_driver.get(self.driver_key, {})
def execute_task(self, task_id):
task = self.state_manager.get_task(task_id)
if task is None:
self.logger.error("Invalid task %s" % (task_id))
raise errors.DriverError("Invalid task %s" % (task_id))
if task.action not in self.supported_actions:
self.logger.error("Driver %s doesn't support task action %s"
% (self.driver_desc, task.action))
raise errors.DriverError("Driver %s doesn't support task action %s"
% (self.driver_desc, task.action))
@ -66,7 +71,7 @@ class PyghmiDriver(oob.OobDriver):
result=hd_fields.ActionResult.Success)
return
site_design = self.orchestrator.get_effective_site(design_id, task.site_name)
site_design = self.orchestrator.get_effective_site(design_id)
target_nodes = []
@ -118,13 +123,6 @@ class PyghmiDriver(oob.OobDriver):
if x.get_result() in [hd_fields.ActionResult.PartialSuccess,
hd_fields.ActionResult.Failure]]
print("Task %s successful subtasks: %s" %
(task.get_id(), len(success_subtasks)))
print("Task %s unsuccessful subtasks: %s" %
(task.get_id(), len(nosuccess_subtasks)))
print("Task %s total subtasks: %s" %
(task.get_id(), len(task.get_subtasks())))
task_result = None
if len(success_subtasks) > 0 and len(nosuccess_subtasks) > 0:
task_result = hd_fields.ActionResult.PartialSuccess
@ -145,9 +143,11 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
def __init__(self, node=None, **kwargs):
super(PyghmiTaskRunner, self).__init__(**kwargs)
self.logger = logging.getLogger('drydock.oobdriver.pyghmi')
# We cheat here by providing the Node model instead
# of making the runner source it from statemgmt
if node is None:
self.logger.error("Did not specify target node")
raise errors.DriverError("Did not specify target node")
self.node = node
@ -172,7 +172,7 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
"task node scope")
ipmi_network = self.node.applied.get('oob_network')
ipmi_network = self.node.oob_network
ipmi_address = self.node.get_network_address(ipmi_network)
if ipmi_address is None:
@ -184,8 +184,8 @@ class PyghmiTaskRunner(drivers.DriverTaskRunner):
self.orchestrator.task_field_update(self.task.get_id(),
status=hd_fields.TaskStatus.Running)
ipmi_account = self.node.applied.get('oob_account', '')
ipmi_credential = self.node.applied.get('oob_credential', '')
ipmi_account = self.node.oob_account
ipmi_credential = self.node.oob_credential
ipmi_session = Command(bmc=ipmi_address, userid=ipmi_account,
password=ipmi_credential)

View File

@ -32,6 +32,7 @@ and storage.
* CreateStorageTemplate - Configure site-wide storage information in bootstrapper
* CreateBootMedia - Ensure all needed boot media is available to the bootstrapper including external repositories
* PrepareHardwareConfig - Prepare the bootstrapper to handle all hardware configuration actions (firmware updates, RAID configuration, driver installation)
* IdentifyNode - Correlate a node definition in the Drydock internal model with a node detected by the downstream node bootstrapper.
* ConfigureHardware - Update and validate all hardware configurations on a node prior to deploying the OS on it
* InterrogateNode - Interrogate the bootstrapper about node information. Depending on the current state of the node, this interrogation will produce different information.
* ApplyNodeNetworking - Configure networking for a node

View File

@ -28,7 +28,7 @@ def start_drydock():
logger.setLevel(config.DrydockConfig.global_config.get('log_level'))
ch = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(filename)s:%(funcName)s - %(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

View File

@ -83,7 +83,11 @@ class Ingester(object):
self.logger.debug("Ingester:ingest_data ingesting design parts for design %s" % design_id)
if plugin_name in self.registered_plugins:
design_items = self.registered_plugins[plugin_name].ingest_data(**kwargs)
try:
design_items = self.registered_plugins[plugin_name].ingest_data(**kwargs)
except ValueError as vex:
self.logger.warn("Ingester:ingest_data - Error process data - %s" % (str(vex)))
return None
self.logger.debug("Ingester:ingest_data parsed %s design parts" % str(len(design_items)))
for m in design_items:
if context is not None:

View File

@ -70,6 +70,8 @@ class YamlIngester(IngesterPlugin):
def parse_docs(self, yaml_string):
models = []
self.logger.debug("yamlingester:parse_docs - Parsing YAML string \n%s" % (yaml_string))
try:
parsed_data = yaml.load_all(yaml_string)
except yaml.YAMLError as err:

View File

@ -44,6 +44,7 @@ class OrchestratorAction(BaseDrydockEnum):
CreateStorageTemplate = 'create_storage_template'
CreateBootMedia = 'create_boot_media'
PrepareHardwareConfig = 'prepare_hardware_config'
IdentifyNode = 'identify_node'
ConfigureHardware = 'configure_hardware'
InterrogateNode = 'interrogate_node'
ApplyNodeNetworking = 'apply_node_networking'

View File

@ -16,6 +16,7 @@ import uuid
import time
import threading
import importlib
import logging
from copy import deepcopy
@ -33,6 +34,8 @@ class Orchestrator(object):
self.state_manager = state_manager
self.logger = logging.getLogger('drydock.orchestrator')
if enabled_drivers is not None:
oob_driver_name = enabled_drivers.get('oob', None)
if oob_driver_name is not None:
@ -155,10 +158,14 @@ class Orchestrator(object):
task_scope=task_scope,
action=hd_fields.OrchestratorAction.CreateNetworkTemplate)
self.logger.info("Starting node driver task %s to create network templates" % (driver_task.get_id()))
driver.execute_task(driver_task.get_id())
driver_task = self.state_manager.get_task(driver_task.get_id())
self.logger.info("Node driver task %s complete" % (driver_task.get_id()))
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=driver_task.get_result())
@ -167,12 +174,13 @@ class Orchestrator(object):
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Running)
driver = self.enabled_drivers['oob']
oob_driver = self.enabled_drivers['oob']
if driver is None:
if oob_driver is None:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Errored,
result=hd_fields.ActionResult.Failure)
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error: No oob driver configured', 'retry': False})
return
site_design = self.get_effective_site(design_id)
@ -186,30 +194,42 @@ class Orchestrator(object):
task_scope = {'site' : task_site,
'node_names' : target_names}
driver_task = self.create_task(tasks.DriverTask,
oob_driver_task = self.create_task(tasks.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.InterrogateNode,
action=hd_fields.OrchestratorAction.InterrogateOob,
task_scope=task_scope)
driver.execute_task(driver_task.get_id())
oob_driver.execute_task(oob_driver_task.get_id())
driver_task = self.state_manager.get_task(driver_task.get_id())
oob_driver_task = self.state_manager.get_task(oob_driver_task.get_id())
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=driver_task.get_result())
result=oob_driver_task.get_result())
return
elif task.action == hd_fields.OrchestratorAction.PrepareNode:
failed = worked = False
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Running)
driver = self.enabled_drivers['oob']
oob_driver = self.enabled_drivers['oob']
if driver is None:
if oob_driver is None:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Errored,
result=hd_fields.ActionResult.Failure)
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error: No oob driver configured', 'retry': False})
return
node_driver = self.enabled_drivers['node']
if node_driver is None:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Errored,
result=hd_fields.ActionResult.Failure,
result_detail={'detail': 'Error: No node driver configured', 'retry': False})
return
site_design = self.get_effective_site(design_id)
@ -229,33 +249,88 @@ class Orchestrator(object):
action=hd_fields.OrchestratorAction.SetNodeBoot,
task_scope=task_scope)
driver.execute_task(setboot_task.get_id())
self.logger.info("Starting OOB driver task %s to set PXE boot" % (setboot_task.get_id()))
oob_driver.execute_task(setboot_task.get_id())
self.logger.info("OOB driver task %s complete" % (setboot_task.get_id()))
setboot_task = self.state_manager.get_task(setboot_task.get_id())
if setboot_task.get_result() == hd_fields.ActionResult.Success:
worked = True
elif setboot_task.get_result() == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
elif setboot_task.get_result() == hd_fields.ActionResult.Failure:
failed = True
cycle_task = self.create_task(tasks.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.PowerCycleNode,
task_scope=task_scope)
driver.execute_task(cycle_task.get_id())
self.logger.info("Starting OOB driver task %s to power cycle nodes" % (cycle_task.get_id()))
oob_driver.execute_task(cycle_task.get_id())
self.logger.info("OOB driver task %s complete" % (cycle_task.get_id()))
cycle_task = self.state_manager.get_task(cycle_task.get_id())
if (setboot_task.get_result() == hd_fields.ActionResult.Success and
cycle_task.get_result() == hd_fields.ActionResult.Success):
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Success)
elif (setboot_task.get_result() == hd_fields.ActionResult.Success or
cycle_task.get_result() == hd_fields.ActionResult.Success):
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.PartialSuccess)
if cycle_task.get_result() == hd_fields.ActionResult.Success:
worked = True
elif cycle_task.get_result() == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
elif cycle_task.get_result() == hd_fields.ActionResult.Failure:
failed = True
# IdentifyNode success will take some time after PowerCycleNode finishes
# Retry the operation a few times if it fails before considering it a final failure
# Each attempt is a new task which might make the final task tree a bit confusing
node_identify_attempts = 0
while True:
node_identify_task = self.create_task(tasks.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.IdentifyNode,
task_scope=task_scope)
self.logger.info("Starting node driver task %s to identify node - attempt %s" %
(node_identify_task.get_id(), node_identify_attempts+1))
node_driver.execute_task(node_identify_task.get_id())
node_identify_attempts = node_identify_attempts + 1
node_identify_task = self.state_manager.get_task(node_identify_task.get_id())
if node_identify_task.get_result() == hd_fields.ActionResult.Success:
worked = True
break
elif node_identify_task.get_result() in [hd_fields.ActionResult.PartialSuccess,
hd_fields.ActionResult.Failure]:
# TODO This threshold should be a configurable default and tunable by task API
if node_identify_attempts > 2:
failed = True
break
time.sleep(5 * 60)
final_result = None
if worked and failed:
final_result = hd_fields.ActionResult.PartialSuccess
elif worked:
final_result = hd_fields.ActionResult.Success
else:
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure)
final_result = hd_fields.ActionResult.Failure
self.task_field_update(task_id,
status=hd_fields.TaskStatus.Complete,
result=final_result)
return
else:

View File

@ -62,6 +62,7 @@ setup(name='drydock_provisioner',
'requests',
'oauthlib',
'uwsgi>1.4',
'bson===0.4.7'
]
)