drydock/drydock_provisioner/drivers/node/maasdriver/driver.py

2220 lines
100 KiB
Python

# Copyright 2017 AT&T Intellectual Property. All other rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Task driver for completing node provisioning with Canonical MaaS 2.2+."""
import time
import logging
import traceback
import sys
import uuid
import re
import math
from oslo_config import cfg
import drydock_provisioner.error as errors
import drydock_provisioner.drivers as drivers
import drydock_provisioner.objects.fields as hd_fields
import drydock_provisioner.objects.task as task_model
import drydock_provisioner.objects.hostprofile as hostprofile
from drydock_provisioner.drivers.node import NodeDriver
from drydock_provisioner.drivers.node.maasdriver.api_client import MaasRequestFactory
import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
import drydock_provisioner.drivers.node.maasdriver.models.subnet as maas_subnet
import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
import drydock_provisioner.drivers.node.maasdriver.models.tag as maas_tag
import drydock_provisioner.drivers.node.maasdriver.models.sshkey as maas_keys
import drydock_provisioner.drivers.node.maasdriver.models.boot_resource as maas_boot_res
import drydock_provisioner.drivers.node.maasdriver.models.rack_controller as maas_rack
import drydock_provisioner.drivers.node.maasdriver.models.partition as maas_partition
import drydock_provisioner.drivers.node.maasdriver.models.volumegroup as maas_vg
class MaasNodeDriver(NodeDriver):
maasdriver_options = [
cfg.StrOpt(
'maas_api_key', help='The API key for accessing MaaS',
secret=True),
cfg.StrOpt('maas_api_url', help='The URL for accessing MaaS API'),
cfg.IntOpt(
'poll_interval',
default=10,
help='Polling interval for querying MaaS status in seconds'),
]
driver_name = 'maasdriver'
driver_key = 'maasdriver'
driver_desc = 'MaaS Node Provisioning Driver'
def __init__(self, **kwargs):
super(MaasNodeDriver, self).__init__(**kwargs)
cfg.CONF.register_opts(
MaasNodeDriver.maasdriver_options, group=MaasNodeDriver.driver_key)
self.logger = logging.getLogger(
cfg.CONF.logging.nodedriver_logger_name)
def execute_task(self, task_id):
task = self.state_manager.get_task(task_id)
if task is None:
raise errors.DriverError("Invalid task %s" % (task_id))
if task.action not in self.supported_actions:
raise errors.DriverError(
"Driver %s doesn't support task action %s" % (self.driver_desc,
task.action))
if task.action == hd_fields.OrchestratorAction.ValidateNodeServices:
result_detail = {
'detail': [],
'retry': False,
}
result = hd_fields.ActionResult.Success
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
maas_client = MaasRequestFactory(cfg.CONF.maasdriver.maas_api_url,
cfg.CONF.maasdriver.maas_api_key)
try:
if maas_client.test_connectivity():
self.logger.info("Able to connect to MaaS.")
result_detail['detail'].append("Able to connect to MaaS.")
if maas_client.test_authentication():
self.logger.info("Able to authenitcate with MaaS API.")
result_detail['detail'].append(
"Able to authenticate with MaaS API.")
boot_res = maas_boot_res.BootResources(maas_client)
boot_res.refresh()
if boot_res.is_importing():
self.logger.info(
"MaaS still importing boot resources.")
result_detail['detail'].append(
"MaaS still importing boot resources.")
result = hd_fields.ActionResult.Failure
else:
if boot_res.len() > 0:
self.logger.info(
"MaaS has synced boot resources.")
result_detail['detail'].append(
"MaaS has synced boot resources.")
else:
self.logger.info("MaaS has no boot resources.")
result_detail['detail'].append(
"MaaS has no boot resources.")
result = hd_fields.ActionResult.Failure
rack_ctlrs = maas_rack.RackControllers(maas_client)
rack_ctlrs.refresh()
if rack_ctlrs.len() == 0:
self.logger.info(
"No rack controllers registered in MaaS")
result_detail['detail'].append(
"No rack controllers registered in MaaS")
result = hd_fields.ActionResult.Failure
else:
for r in rack_ctlrs:
rack_svc = r.get_services()
rack_name = r.hostname
for s in rack_svc:
if s in maas_rack.RackController.REQUIRED_SERVICES:
self.logger.info(
"Service %s on rackd %s is %s" %
(s, rack_name, rack_svc[s]))
result_detail['detail'].append(
"Service %s on rackd %s is %s" %
(s, rack_name, rack_svc[s]))
if rack_svc[s] not in ("running",
"off"):
result = hd_fields.ActionResult.Failure
except errors.TransientDriverError as ex:
result_detail['retry'] = True
result_detail['detail'].append(str(ex))
result = hd_fields.ActionResult.Failure
except errors.PersistentDriverError as ex:
result_detail['detail'].append(str(ex))
result = hd_fields.ActionResult.Failure
except Exception as ex:
result_detail['detail'].append(str(ex))
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
return
design_id = getattr(task, 'design_id', None)
if design_id is None:
raise errors.DriverError("No design ID specified in task %s" %
(task_id))
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
if task.action == hd_fields.OrchestratorAction.CreateNetworkTemplate:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=task.action)
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to create network templates" %
(subtask.get_id()))
runner.start()
runner.join(timeout=cfg.CONF.timeouts.create_network_template * 60)
if runner.is_alive():
result = {
'retry': False,
'detail': 'MaaS Network creation timed-out'
}
self.logger.warning("Thread for task %s timed out after 120s" %
(subtask.get_id()))
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail=result)
else:
subtask = self.state_manager.get_task(subtask.get_id())
self.logger.info("Thread for task %s completed - result %s" %
(subtask.get_id(), subtask.get_result()))
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=subtask.get_result())
return
elif task.action == hd_fields.OrchestratorAction.ConfigureUserCredentials:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=task.action)
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to configure user credentials" %
(subtask.get_id()))
runner.start()
runner.join(
timeout=cfg.CONF.timeouts.configure_user_credentials * 60)
if runner.is_alive():
result = {
'retry': False,
'detail': 'MaaS ssh key creation timed-out'
}
self.logger.warning("Thread for task %s timed out after 120s" %
(subtask.get_id()))
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail=result)
else:
subtask = self.state_manager.get_task(subtask.get_id())
self.logger.info("Thread for task %s completed - result %s" %
(subtask.get_id(), subtask.get_result()))
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=subtask.get_result())
return
elif task.action == hd_fields.OrchestratorAction.IdentifyNode:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
subtasks = []
result_detail = {
'detail': [],
'failed_nodes': [],
'successful_nodes': [],
}
for n in task.node_list:
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.IdentifyNode,
task_scope={'node_names': [n]})
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to identify node %s" %
(subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
cleaned_subtasks = []
attempts = 0
max_attempts = cfg.CONF.timeouts.identify_node * (
60 // cfg.CONF.poll_interval)
worked = failed = False
self.logger.debug(
"Polling for subtask completetion every %d seconds, a max of %d polls."
% (cfg.CONF.poll_interval, max_attempts))
while len(cleaned_subtasks) < len(
subtasks) and attempts < max_attempts:
for t in subtasks:
if t in cleaned_subtasks:
continue
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info(
"Task %s to identify node complete - status %s" %
(subtask.get_id(), subtask.get_result()))
cleaned_subtasks.append(t)
if subtask.result == hd_fields.ActionResult.Success:
result_detail['successful_nodes'].extend(
subtask.node_list)
worked = True
elif subtask.result == hd_fields.ActionResult.Failure:
result_detail['failed_nodes'].extend(
subtask.node_list)
failed = True
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(cfg.CONF.maasdriver.poll_interval)
attempts = attempts + 1
if len(cleaned_subtasks) < len(subtasks):
self.logger.warning(
"Time out for task %s before all subtask threads complete"
% (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append(
'Some subtasks did not complete before the timeout threshold'
)
elif worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task.action == hd_fields.OrchestratorAction.ConfigureHardware:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
self.logger.debug("Starting subtask to commissiong %s nodes." %
(len(task.node_list)))
subtasks = []
result_detail = {
'detail': [],
'failed_nodes': [],
'successful_nodes': [],
}
for n in task.node_list:
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.ConfigureHardware,
task_scope={'node_names': [n]})
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to commission node %s" %
(subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
cleaned_subtasks = []
attempts = 0
max_attempts = cfg.CONF.timeouts.configure_hardware * (
60 // cfg.CONF.poll_interval)
worked = failed = False
self.logger.debug(
"Polling for subtask completetion every %d seconds, a max of %d polls."
% (cfg.CONF.poll_interval, max_attempts))
while len(cleaned_subtasks) < len(
subtasks) and attempts < max_attempts:
for t in subtasks:
if t in cleaned_subtasks:
continue
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info(
"Task %s to commission node complete - status %s" %
(subtask.get_id(), subtask.get_result()))
cleaned_subtasks.append(t)
if subtask.result == hd_fields.ActionResult.Success:
result_detail['successful_nodes'].extend(
subtask.node_list)
worked = True
elif subtask.result == hd_fields.ActionResult.Failure:
result_detail['failed_nodes'].extend(
subtask.node_list)
failed = True
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(cfg.CONF.maasdriver.poll_interval)
attempts = attempts + 1
if len(cleaned_subtasks) < len(subtasks):
self.logger.warning(
"Time out for task %s before all subtask threads complete"
% (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append(
'Some subtasks did not complete before the timeout threshold'
)
elif worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task.action == hd_fields.OrchestratorAction.ApplyNodeNetworking:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
self.logger.debug(
"Starting subtask to configure networking on %s nodes." %
(len(task.node_list)))
subtasks = []
result_detail = {
'detail': [],
'failed_nodes': [],
'successful_nodes': [],
}
for n in task.node_list:
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.ApplyNodeNetworking,
task_scope={'node_names': [n]})
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to configure networking on node %s"
% (subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
cleaned_subtasks = []
attempts = 0
max_attempts = cfg.CONF.timeouts.apply_node_networking * (
60 // cfg.CONF.poll_interval)
worked = failed = False
self.logger.debug(
"Polling for subtask completetion every %d seconds, a max of %d polls."
% (cfg.CONF.poll_interval, max_attempts))
while len(cleaned_subtasks) < len(
subtasks) and attempts < max_attempts:
for t in subtasks:
if t in cleaned_subtasks:
continue
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info(
"Task %s to apply networking complete - status %s"
% (subtask.get_id(), subtask.get_result()))
cleaned_subtasks.append(t)
if subtask.result == hd_fields.ActionResult.Success:
result_detail['successful_nodes'].extend(
subtask.node_list)
worked = True
elif subtask.result == hd_fields.ActionResult.Failure:
result_detail['failed_nodes'].extend(
subtask.node_list)
failed = True
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(cfg.CONF.poll_interval)
attempts = attempts + 1
if len(cleaned_subtasks) < len(subtasks):
self.logger.warning(
"Time out for task %s before all subtask threads complete"
% (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append(
'Some subtasks did not complete before the timeout threshold'
)
elif worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task.action == hd_fields.OrchestratorAction.ApplyNodeStorage:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
self.logger.debug(
"Starting subtask to configure the storage on %s nodes." %
(len(task.node_list)))
subtasks = []
result_detail = {
'detail': [],
'failed_nodes': [],
'successful_nodes': [],
}
for n in task.node_list:
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.ApplyNodeStorage,
task_scope={'node_names': [n]})
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to config node %s storage" %
(subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
cleaned_subtasks = []
attempts = 0
max_attempts = cfg.CONF.timeouts.apply_node_storage * (
60 // cfg.CONF.poll_interval)
worked = failed = False
self.logger.debug(
"Polling for subtask completion every %d seconds, a max of %d polls."
% (cfg.CONF.poll_interval, max_attempts))
while len(cleaned_subtasks) < len(
subtasks) and attempts < max_attempts:
for t in subtasks:
if t in cleaned_subtasks:
continue
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info(
"Task %s to configure node storage complete - status %s"
% (subtask.get_id(), subtask.get_result()))
cleaned_subtasks.append(t)
if subtask.result == hd_fields.ActionResult.Success:
result_detail['successful_nodes'].extend(
subtask.node_list)
worked = True
elif subtask.result == hd_fields.ActionResult.Failure:
result_detail['failed_nodes'].extend(
subtask.node_list)
failed = True
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(cfg.CONF.poll_interval)
attempts = attempts + 1
if len(cleaned_subtasks) < len(subtasks):
self.logger.warning(
"Time out for task %s before all subtask threads complete"
% (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append(
'Some subtasks did not complete before the timeout threshold'
)
elif worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task.action == hd_fields.OrchestratorAction.ApplyNodePlatform:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
self.logger.debug(
"Starting subtask to configure the platform on %s nodes." %
(len(task.node_list)))
subtasks = []
result_detail = {
'detail': [],
'failed_nodes': [],
'successful_nodes': [],
}
for n in task.node_list:
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.ApplyNodePlatform,
task_scope={'node_names': [n]})
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to config node %s platform" %
(subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
cleaned_subtasks = []
attempts = 0
max_attempts = cfg.CONF.timeouts.apply_node_platform * (
60 // cfg.CONF.poll_interval)
worked = failed = False
self.logger.debug(
"Polling for subtask completetion every %d seconds, a max of %d polls."
% (cfg.CONF.poll_interval, max_attempts))
while len(cleaned_subtasks) < len(
subtasks) and attempts < max_attempts:
for t in subtasks:
if t in cleaned_subtasks:
continue
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info(
"Task %s to configure node platform complete - status %s"
% (subtask.get_id(), subtask.get_result()))
cleaned_subtasks.append(t)
if subtask.result == hd_fields.ActionResult.Success:
result_detail['successful_nodes'].extend(
subtask.node_list)
worked = True
elif subtask.result == hd_fields.ActionResult.Failure:
result_detail['failed_nodes'].extend(
subtask.node_list)
failed = True
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(cfg.CONF.poll_interval)
attempts = attempts + 1
if len(cleaned_subtasks) < len(subtasks):
self.logger.warning(
"Time out for task %s before all subtask threads complete"
% (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append(
'Some subtasks did not complete before the timeout threshold'
)
elif worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task.action == hd_fields.OrchestratorAction.DeployNode:
self.orchestrator.task_field_update(
task.get_id(), status=hd_fields.TaskStatus.Running)
self.logger.debug("Starting subtask to deploy %s nodes." %
(len(task.node_list)))
subtasks = []
result_detail = {
'detail': [],
'failed_nodes': [],
'successful_nodes': [],
}
for n in task.node_list:
subtask = self.orchestrator.create_task(
task_model.DriverTask,
parent_task_id=task.get_id(),
design_id=design_id,
action=hd_fields.OrchestratorAction.DeployNode,
task_scope={'node_names': [n]})
runner = MaasTaskRunner(
state_manager=self.state_manager,
orchestrator=self.orchestrator,
task_id=subtask.get_id())
self.logger.info(
"Starting thread for task %s to deploy node %s" %
(subtask.get_id(), n))
runner.start()
subtasks.append(subtask.get_id())
cleaned_subtasks = []
attempts = 0
max_attempts = cfg.CONF.timeouts.deploy_node * (
60 // cfg.CONF.poll_interval)
worked = failed = False
self.logger.debug(
"Polling for subtask completetion every %d seconds, a max of %d polls."
% (cfg.CONF.poll_interval, max_attempts))
while len(cleaned_subtasks) < len(
subtasks) and attempts < max_attempts:
for t in subtasks:
if t in cleaned_subtasks:
continue
subtask = self.state_manager.get_task(t)
if subtask.status == hd_fields.TaskStatus.Complete:
self.logger.info(
"Task %s to deploy node complete - status %s" %
(subtask.get_id(), subtask.get_result()))
cleaned_subtasks.append(t)
if subtask.result == hd_fields.ActionResult.Success:
result_detail['successful_nodes'].extend(
subtask.node_list)
worked = True
elif subtask.result == hd_fields.ActionResult.Failure:
result_detail['failed_nodes'].extend(
subtask.node_list)
failed = True
elif subtask.result == hd_fields.ActionResult.PartialSuccess:
worked = failed = True
time.sleep(max_attempts)
attempts = attempts + 1
if len(cleaned_subtasks) < len(subtasks):
self.logger.warning(
"Time out for task %s before all subtask threads complete"
% (task.get_id()))
result = hd_fields.ActionResult.DependentFailure
result_detail['detail'].append(
'Some subtasks did not complete before the timeout threshold'
)
elif worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
else:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
class MaasTaskRunner(drivers.DriverTaskRunner):
def __init__(self, **kwargs):
super(MaasTaskRunner, self).__init__(**kwargs)
# TODO(sh8121att): Need to build this name from configs
self.logger = logging.getLogger('drydock.nodedriver.maasdriver')
def execute_task(self):
task_action = self.task.action
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Running,
result=hd_fields.ActionResult.Incomplete)
self.maas_client = MaasRequestFactory(cfg.CONF.maasdriver.maas_api_url,
cfg.CONF.maasdriver.maas_api_key)
site_design = self.orchestrator.get_effective_site(self.task.design_id)
if task_action == hd_fields.OrchestratorAction.CreateNetworkTemplate:
# Try to true up MaaS definitions of fabrics/vlans/subnets
# with the networks defined in Drydock
design_networks = site_design.networks
design_links = site_design.network_links
fabrics = maas_fabric.Fabrics(self.maas_client)
fabrics.refresh()
subnets = maas_subnet.Subnets(self.maas_client)
subnets.refresh()
result_detail = {'detail': []}
for l in design_links:
if l.metalabels is not None:
# TODO(sh8121att): move metalabels into config
if 'noconfig' in l.metalabels:
self.logger.info(
"NetworkLink %s marked 'noconfig', skipping configuration including allowed networks."
% (l.name))
continue
fabrics_found = set()
# First loop through the possible Networks on this NetworkLink
# and validate that MaaS's self-discovered networking matches
# our design. This means all self-discovered networks that are matched
# to a link need to all be part of the same fabric. Otherwise there is no
# way to reconcile the discovered topology with the designed topology
for net_name in l.allowed_networks:
n = site_design.get_network(net_name)
if n is None:
self.logger.warning(
"Network %s allowed on link %s, but not defined." %
(net_name, l.name))
continue
maas_net = subnets.singleton({'cidr': n.cidr})
if maas_net is not None:
fabrics_found.add(maas_net.fabric)
if len(fabrics_found) > 1:
self.logger.warning(
"MaaS self-discovered network incompatible with NetworkLink %s"
% l.name)
continue
elif len(fabrics_found) == 1:
link_fabric_id = fabrics_found.pop()
link_fabric = fabrics.select(link_fabric_id)
link_fabric.name = l.name
link_fabric.update()
else:
link_fabric = fabrics.singleton({'name': l.name})
if link_fabric is None:
link_fabric = maas_fabric.Fabric(
self.maas_client, name=l.name)
link_fabric = fabrics.add(link_fabric)
# Ensure that the MTU of the untagged VLAN on the fabric
# matches that on the NetworkLink config
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=link_fabric.resource_id)
vlan_list.refresh()
vlan = vlan_list.singleton({'vid': 0})
vlan.mtu = l.mtu
vlan.update()
# Now that we have the fabrics sorted out, check
# that VLAN tags and subnet attributes are correct
for net_name in l.allowed_networks:
n = site_design.get_network(net_name)
if n is None:
continue
try:
subnet = subnets.singleton({'cidr': n.cidr})
if subnet is None:
self.logger.info(
"Subnet for network %s not found, creating..."
% (n.name))
fabric_list = maas_fabric.Fabrics(self.maas_client)
fabric_list.refresh()
fabric = fabric_list.singleton({'name': l.name})
if fabric is not None:
vlan_list = maas_vlan.Vlans(
self.maas_client,
fabric_id=fabric.resource_id)
vlan_list.refresh()
vlan = vlan_list.singleton({
'vid':
n.vlan_id if n.vlan_id is not None else 0
})
if vlan is not None:
vlan.name = n.name
if getattr(n, 'mtu', None) is not None:
vlan.mtu = n.mtu
vlan.update()
result_detail['detail'].append(
"VLAN %s found for network %s, updated attributes"
% (vlan.resource_id, n.name))
else:
# Create a new VLAN in this fabric and assign subnet to it
vlan = maas_vlan.Vlan(
self.maas_client,
name=n.name,
vid=n.vlan_id,
mtu=getattr(n, 'mtu', None),
fabric_id=fabric.resource_id)
vlan = vlan_list.add(vlan)
result_detail['detail'].append(
"VLAN %s created for network %s" %
(vlan.resource_id, n.name))
# If subnet did not exist, create it here and attach it to the fabric/VLAN
subnet = maas_subnet.Subnet(
self.maas_client,
name=n.name,
cidr=n.cidr,
dns_servers=n.dns_servers,
fabric=fabric.resource_id,
vlan=vlan.resource_id,
gateway_ip=n.get_default_gateway())
subnet_list = maas_subnet.Subnets(
self.maas_client)
subnet = subnet_list.add(subnet)
self.logger.info(
"Created subnet %s for CIDR %s on VLAN %s"
% (subnet.resource_id, subnet.cidr,
subnet.vlan))
result_detail['detail'].append(
"Subnet %s created for network %s" %
(subnet.resource_id, n.name))
else:
self.logger.error(
"Fabric %s should be created, but cannot locate it."
% (l.name))
else:
subnet.name = n.name
subnet.dns_servers = n.dns_servers
result_detail['detail'].append(
"Subnet %s found for network %s, updated attributes"
% (subnet.resource_id, n.name))
self.logger.info(
"Updating existing MaaS subnet %s" %
(subnet.resource_id))
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=subnet.fabric)
vlan_list.refresh()
vlan = vlan_list.select(subnet.vlan)
if vlan is not None:
vlan.name = n.name
vlan.set_vid(n.vlan_id)
if getattr(n, 'mtu', None) is not None:
vlan.mtu = n.mtu
vlan.update()
result_detail['detail'].append(
"VLAN %s found for network %s, updated attributes"
% (vlan.resource_id, n.name))
else:
self.logger.error(
"MaaS subnet %s does not have a matching VLAN"
% (subnet.resource_id))
continue
# Check if the routes have a default route
subnet.gateway_ip = n.get_default_gateway()
subnet.update()
dhcp_on = False
for r in n.ranges:
subnet.add_address_range(r)
if r.get('type', None) == 'dhcp':
dhcp_on = True
vlan_list = maas_vlan.Vlans(
self.maas_client, fabric_id=subnet.fabric)
vlan_list.refresh()
vlan = vlan_list.select(subnet.vlan)
if dhcp_on and not vlan.dhcp_on:
# check if design requires a dhcp relay and if the MaaS vlan already uses a dhcp_relay
self.logger.info(
"DHCP enabled for subnet %s, activating in MaaS"
% (subnet.name))
rack_ctlrs = maas_rack.RackControllers(
self.maas_client)
rack_ctlrs.refresh()
dhcp_config_set = False
for r in rack_ctlrs:
if n.dhcp_relay_upstream_target is not None:
if r.interface_for_ip(
n.dhcp_relay_upstream_target):
iface = r.interface_for_ip(
n.dhcp_relay_upstream_target)
vlan.relay_vlan = iface.vlan
self.logger.debug(
"Relaying DHCP on vlan %s to vlan %s"
% (vlan.resource_id,
vlan.relay_vlan))
result_detail['detail'].append(
"Relaying DHCP on vlan %s to vlan %s"
% (vlan.resource_id,
vlan.relay_vlan))
vlan.update()
dhcp_config_set = True
break
else:
for i in r.interfaces:
if i.vlan == vlan.resource_id:
self.logger.debug(
"Rack controller %s has interface on vlan %s"
% (r.resource_id,
vlan.resource_id))
rackctl_id = r.resource_id
vlan.dhcp_on = True
vlan.primary_rack = rackctl_id
self.logger.debug(
"Enabling DHCP on VLAN %s managed by rack ctlr %s"
% (vlan.resource_id,
rackctl_id))
result_detail['detail'].append(
"Enabling DHCP on VLAN %s managed by rack ctlr %s"
% (vlan.resource_id,
rackctl_id))
vlan.update()
dhcp_config_set = True
break
if dhcp_config_set:
break
if not dhcp_config_set:
self.logger.error(
"Network %s requires DHCP, but could not locate a rack controller to serve it."
% (n.name))
result_detail['detail'].append(
"Network %s requires DHCP, but could not locate a rack controller to serve it."
% (n.name))
elif dhcp_on and vlan.dhcp_on:
self.logger.info(
"DHCP already enabled for subnet %s" %
(subnet.resource_id))
# TODO(sh8121att): sort out static route support as MaaS seems to require the destination
# network be defined in MaaS as well
except ValueError as vex:
raise errors.DriverError("Inconsistent data from MaaS")
subnet_list = maas_subnet.Subnets(self.maas_client)
subnet_list.refresh()
action_result = hd_fields.ActionResult.Incomplete
success_rate = 0
for n in design_networks:
if n.metalabels is not None:
# TODO(sh8121att): move metalabels into config
if 'noconfig' in n.metalabels:
self.logger.info(
"Network %s marked 'noconfig', skipping validation."
% (l.name))
continue
exists = subnet_list.query({'cidr': n.cidr})
if len(exists) > 0:
subnet = exists[0]
if subnet.name == n.name:
success_rate = success_rate + 1
else:
success_rate = success_rate + 1
else:
success_rate = success_rate + 1
if success_rate == len(design_networks):
action_result = hd_fields.ActionResult.Success
elif success_rate == -(len(design_networks)):
action_result = hd_fields.ActionResult.Failure
else:
action_result = hd_fields.ActionResult.PartialSuccess
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=action_result,
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.ConfigureUserCredentials:
try:
key_list = maas_keys.SshKeys(self.maas_client)
key_list.refresh()
except:
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS SshKeys API',
'retry': True
})
return
site_model = site_design.get_site()
result_detail = {'detail': []}
failed = worked = False
for k in getattr(site_model, 'authorized_keys', []):
try:
if len(key_list.query({'key': k.replace("\n", "")})) == 0:
new_key = maas_keys.SshKey(self.maas_client, key=k)
new_key = key_list.add(new_key)
self.logger.debug(
"Added SSH key %s to MaaS user profile. Will be installed on all deployed nodes."
% (k[:16]))
result_detail['detail'].append("Added SSH key %s" %
(k[:16]))
worked = True
else:
self.logger.debug(
"SSH key %s already exists in MaaS user profile." %
k[:16])
result_detail['detail'].append(
"SSH key %s alreayd exists" % (k[:16]))
worked = True
except Exception as ex:
self.logger.warning(
"Error adding SSH key to MaaS user profile: %s" %
str(ex))
result_detail['detail'].append("Failed to add SSH key %s" %
(k[:16]))
failed = True
if worked and failed:
final_result = hd_fields.ActionResult.PartialSuccess
elif worked:
final_result = hd_fields.ActionResult.Success
else:
final_result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=final_result,
result_detail=result_detail)
return
elif task_action == hd_fields.OrchestratorAction.IdentifyNode:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except:
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS Machines API',
'retry': True
})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
for n in nodes:
try:
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(node)
if machine is not None:
worked = True
result_detail['detail'].append(
"Node %s identified in MaaS" % n)
else:
failed = True
result_detail['detail'].append(
"Node %s not found in MaaS" % n)
except Exception as ex:
failed = True
result_detail['detail'].append(
"Error identifying node %s: %s" % (n, str(ex)))
result = None
if worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
elif failed:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.ConfigureHardware:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except:
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS Machines API',
'retry': True
})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
# TODO(sh8121att): Better way of representing the node statuses than static strings
for n in nodes:
try:
self.logger.debug("Locating node %s for commissioning" %
(n))
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(
node, update_name=False)
if machine is not None:
if machine.status_name in ['New', 'Broken']:
self.logger.debug(
"Located node %s in MaaS, starting commissioning"
% (n))
machine.commission()
# Poll machine status
attempts = 0
max_attempts = cfg.CONF.timeouts.configure_hardware * (
60 // cfg.CONF.maasdriver.poll_interval)
while (
attempts < max_attempts and
(machine.status_name != 'Ready' and
not machine.status_name.startswith('Failed'))
):
attempts = attempts + 1
time.sleep(cfg.CONF.maasdriver.poll_interval)
try:
machine.refresh()
self.logger.debug(
"Polling node %s status attempt %d of %d: %s"
% (n, attempts, max_attempts,
machine.status_name))
except:
self.logger.warning(
"Error updating node %s status during commissioning, will re-attempt."
% (n),
exc_info=True)
if machine.status_name == 'Ready':
self.logger.info("Node %s commissioned." % (n))
result_detail['detail'].append(
"Node %s commissioned" % (n))
worked = True
elif machine.status_name == 'Commissioning':
self.logger.info(
"Located node %s in MaaS, node already being commissioned. Skipping..."
% (n))
result_detail['detail'].append(
"Located node %s in MaaS, node already being commissioned. Skipping..."
% (n))
worked = True
elif machine.status_name == 'Ready':
self.logger.info(
"Located node %s in MaaS, node commissioned. Skipping..."
% (n))
result_detail['detail'].append(
"Located node %s in MaaS, node commissioned. Skipping..."
% (n))
worked = True
else:
self.logger.warning(
"Located node %s in MaaS, unknown status %s. Skipping..."
% (n, machine.status_name))
result_detail['detail'].append(
"Located node %s in MaaS, node commissioned. Skipping..."
% (n))
failed = True
else:
self.logger.warning("Node %s not found in MaaS" % n)
failed = True
result_detail['detail'].append(
"Node %s not found in MaaS" % n)
except Exception as ex:
failed = True
result_detail['detail'].append(
"Error commissioning node %s: %s" % (n, str(ex)))
result = None
if worked and failed:
result = hd_fields.ActionResult.PartialSuccess
elif worked:
result = hd_fields.ActionResult.Success
elif failed:
result = hd_fields.ActionResult.Failure
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=result,
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.ApplyNodeNetworking:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
fabrics = maas_fabric.Fabrics(self.maas_client)
fabrics.refresh()
subnets = maas_subnet.Subnets(self.maas_client)
subnets.refresh()
except Exception as ex:
self.logger.error(
"Error applying node networking, cannot access MaaS: %s" %
str(ex))
traceback.print_tb(sys.last_traceback)
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS API',
'retry': True
})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
# TODO(sh8121att): Better way of representing the node statuses than static strings
for n in nodes:
try:
self.logger.debug(
"Locating node %s for network configuration" % (n))
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(
node, update_name=False)
if machine is not None:
if machine.status_name == 'Ready':
self.logger.debug(
"Located node %s in MaaS, starting interface configuration"
% (n))
machine.reset_network_config()
machine.refresh()
for i in node.interfaces:
nl = site_design.get_network_link(
i.network_link)
if nl.metalabels is not None:
if 'noconfig' in nl.metalabels:
self.logger.info(
"Interface %s connected to NetworkLink %s marked 'noconfig', skipping."
% (i.device_name, nl.name))
continue
fabric = fabrics.singleton({'name': nl.name})
if fabric is None:
self.logger.error(
"No fabric found for NetworkLink %s" %
(nl.name))
failed = True
continue
if nl.bonding_mode != hd_fields.NetworkLinkBondingMode.Disabled:
if len(i.get_hw_slaves()) > 1:
msg = "Building node %s interface %s as a bond." % (
n, i.device_name)
self.logger.debug(msg)
result_detail['detail'].append(msg)
hw_iface_list = i.get_hw_slaves()
iface = machine.interfaces.create_bond(
device_name=i.device_name,
parent_names=hw_iface_list,
mtu=nl.mtu,
fabric=fabric.resource_id,
mode=nl.bonding_mode,
monitor_interval=nl.
bonding_mon_rate,
downdelay=nl.bonding_down_delay,
updelay=nl.bonding_up_delay,
lacp_rate=nl.bonding_peer_rate,
hash_policy=nl.bonding_xmit_hash)
else:
msg = "Network link %s indicates bonding, interface %s has less than 2 slaves." % \
(nl.name, i.device_name)
self.logger.warning(msg)
result_detail['detail'].append(msg)
continue
else:
if len(i.get_hw_slaves()) > 1:
msg = "Network link %s disables bonding, interface %s has multiple slaves." % \
(nl.name, i.device_name)
self.logger.warning(msg)
result_detail['detail'].append(msg)
continue
elif len(i.get_hw_slaves()) == 0:
msg = "Interface %s has 0 slaves." % (
i.device_name)
self.logger.warning(msg)
result_detail['detail'].append(msg)
else:
msg = "Configuring interface %s on node %s" % (
i.device_name, n)
self.logger.debug(msg)
hw_iface = i.get_hw_slaves()[0]
# TODO(sh8121att): HardwareProfile device alias integration
iface = machine.get_network_interface(
hw_iface)
if iface is None:
self.logger.warning(
"Interface %s not found on node %s, skipping configuration"
% (i.device_name, machine.resource_id))
failed = True
continue
if iface.fabric_id == fabric.resource_id:
self.logger.debug(
"Interface %s already attached to fabric_id %s"
% (i.device_name, fabric.resource_id))
else:
self.logger.debug(
"Attaching node %s interface %s to fabric_id %s"
% (node.name, i.device_name,
fabric.resource_id))
iface.attach_fabric(
fabric_id=fabric.resource_id)
if iface.effective_mtu != nl.mtu:
self.logger.debug(
"Updating interface %s MTU to %s" %
(i.device_name, nl.mtu))
iface.set_mtu(nl.mtu)
for iface_net in getattr(i, 'networks', []):
dd_net = site_design.get_network(iface_net)
if dd_net is not None:
link_iface = None
if iface_net == getattr(
nl, 'native_network', None):
# If a node interface is attached to the native network for a link
# then the interface itself should be linked to network, not a VLAN
# tagged interface
self.logger.debug(
"Attaching node %s interface %s to untagged VLAN on fabric %s"
% (node.name, i.device_name,
fabric.resource_id))
link_iface = iface
else:
# For non-native networks, we create VLAN tagged interfaces as children
# of this interface
vlan_options = {
'vlan_tag': dd_net.vlan_id,
'parent_name': iface.name,
}
if dd_net.mtu is not None:
vlan_options[
'mtu'] = dd_net.mtu
self.logger.debug(
"Creating tagged interface for VLAN %s on system %s interface %s"
% (dd_net.vlan_id, node.name,
i.device_name))
link_iface = machine.interfaces.create_vlan(
**vlan_options)
link_options = {}
link_options[
'primary'] = True if iface_net == getattr(
node, 'primary_network',
None) else False
link_options[
'subnet_cidr'] = dd_net.cidr
found = False
for a in getattr(
node, 'addressing', []):
if a.network == iface_net:
link_options[
'ip_address'] = 'dhcp' if a.type == 'dhcp' else a.address
found = True
if not found:
self.logger.warning(
"No addressed assigned to network %s for node %s, link is L2 only."
% (iface_net, node.name))
link_options['ip_address'] = None
self.logger.debug(
"Linking system %s interface %s to subnet %s"
% (node.name, i.device_name,
dd_net.cidr))
link_iface.link_subnet(**link_options)
worked = True
else:
failed = True
self.logger.error(
"Did not find a defined Network %s to attach to interface"
% iface_net)
elif machine.status_name == 'Broken':
self.logger.info(
"Located node %s in MaaS, status broken. Run "
"ConfigureHardware before configurating network"
% (n))
result_detail['detail'].append(
"Located node %s in MaaS, status 'Broken'. Skipping..."
% (n))
failed = True
else:
self.logger.warning(
"Located node %s in MaaS, unknown status %s. Skipping..."
% (n, machine.status_name))
result_detail['detail'].append(
"Located node %s in MaaS, unknown status %s. Skipping..."
% (n, machine.status_name))
failed = True
else:
self.logger.warning("Node %s not found in MaaS" % n)
failed = True
result_detail['detail'].append(
"Node %s not found in MaaS" % n)
except Exception as ex:
failed = True
self.logger.error(
"Error configuring network for node %s: %s" %
(n, str(ex)))
result_detail['detail'].append(
"Error configuring network for node %s: %s" %
(n, str(ex)))
if failed:
final_result = hd_fields.ActionResult.Failure
else:
final_result = hd_fields.ActionResult.Success
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=final_result,
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.ApplyNodePlatform:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
tag_list = maas_tag.Tags(self.maas_client)
tag_list.refresh()
except Exception as ex:
self.logger.error(
"Error deploying node, cannot access MaaS: %s" % str(ex))
traceback.print_tb(sys.last_traceback)
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS API',
'retry': True
})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
for n in nodes:
try:
self.logger.debug(
"Locating node %s for platform configuration" % (n))
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(
node, update_name=False)
if machine is None:
self.logger.warning(
"Could not locate machine for node %s" % n)
result_detail['detail'].append(
"Could not locate machine for node %s" % n)
failed = True
continue
except Exception as ex1:
failed = True
self.logger.error(
"Error locating machine for node %s: %s" % (n,
str(ex1)))
result_detail['detail'].append(
"Error locating machine for node %s" % (n))
continue
try:
# Render the string of all kernel params for the node
kp_string = ""
for k, v in getattr(node, 'kernel_params', {}).items():
if v == 'True':
kp_string = kp_string + " %s" % (k)
else:
kp_string = kp_string + " %s=%s" % (k, v)
if kp_string:
# Check if the node has an existing kernel params tag
node_kp_tag = tag_list.select("%s_kp" % (node.name))
self.logger.info(
"Configuring kernel parameters for node %s" %
(node.name))
if node_kp_tag is None:
self.logger.debug(
"Creating kernel_params tag for node %s: %s" %
(node.name, kp_string))
node_kp_tag = maas_tag.Tag(
self.maas_client,
name="%s_kp" % (node.name),
kernel_opts=kp_string)
node_kp_tag = tag_list.add(node_kp_tag)
node_kp_tag.apply_to_node(machine.resource_id)
else:
self.logger.debug(
"Updating tag %s for node %s: %s" %
(node_kp_tag.resource_id, node.name,
kp_string))
node_kp_tag.kernel_opts = kp_string
node_kp_tag.update()
self.logger.info(
"Applied kernel parameters to node %s" % n)
result_detail['detail'].append(
"Applied kernel parameters to node %s" %
(node.name))
worked = True
except Exception as ex2:
failed = True
result_detail['detail'].append(
"Error configuring kernel parameters for node %s" %
(n))
self.logger.error(
"Error configuring kernel parameters for node %s: %s" %
(n, str(ex2)))
continue
try:
if node.tags is not None and len(node.tags) > 0:
self.logger.info(
"Configuring static tags for node %s" %
(node.name))
for t in node.tags:
tag_list.refresh()
tag = tag_list.select(t)
if tag is None:
try:
self.logger.debug(
"Creating static tag %s" % t)
tag = maas_tag.Tag(
self.maas_client, name=t)
tag = tag_list.add(tag)
except errors.DriverError as dex:
tag_list.refresh()
tag = tag_list.select(t)
if tag is not None:
self.logger.debug(
"Tag %s arrived out of nowhere." %
t)
else:
self.logger.error(
"Error creating tag %s." % t)
continue
self.logger.debug("Applying tag %s to node %s" %
(tag.resource_id,
machine.resource_id))
tag.apply_to_node(machine.resource_id)
self.logger.info("Applied static tags to node %s" %
(node.name))
result_detail['detail'].append(
"Applied static tags to node %s" % (node.name))
worked = True
except Exception as ex3:
failed = True
result_detail['detail'].append(
"Error configuring static tags for node %s" %
(node.name))
self.logger.error(
"Error configuring static tags for node %s: %s" %
(node.name, str(ex3)))
continue
if worked and failed:
final_result = hd_fields.ActionResult.PartialSuccess
elif failed:
final_result = hd_fields.ActionResult.Failure
else:
final_result = hd_fields.ActionResult.Success
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=final_result,
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.ApplyNodeStorage:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
except Exception as ex:
self.logger.error(
"Error configuring node storage, cannot access MaaS: %s" %
str(ex))
traceback.print_tb(sys.last_traceback)
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS API',
'retry': True
})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
for n in nodes:
try:
self.logger.debug(
"Locating node %s for storage configuration" % (n))
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(
node, update_name=False)
if machine is None:
self.logger.warning(
"Could not locate machine for node %s" % n)
result_detail['detail'].append(
"Could not locate machine for node %s" % n)
failed = True
continue
except Exception as ex1:
failed = True
self.logger.error(
"Error locating machine for node %s: %s" % (n,
str(ex1)))
result_detail['detail'].append(
"Error locating machine for node %s" % (n))
continue
try:
"""
1. Clear VGs
2. Clear partitions
3. Apply partitioning
4. Create VGs
5. Create logical volumes
"""
self.logger.debug(
"Clearing current storage layout on node %s." %
node.name)
machine.reset_storage_config()
(root_dev, root_block) = node.find_fs_block_device('/')
(boot_dev, boot_block) = node.find_fs_block_device('/boot')
storage_layout = dict()
if isinstance(root_block, hostprofile.HostPartition):
storage_layout['layout_type'] = 'flat'
storage_layout['root_device'] = root_dev.name
storage_layout['root_size'] = root_block.size
elif isinstance(root_block, hostprofile.HostVolume):
storage_layout['layout_type'] = 'lvm'
if len(root_dev.physical_devices) != 1:
msg = "Root LV in VG with multiple physical devices on node %s" % (
node.name)
self.logger.error(msg)
result_detail['detail'].append(msg)
failed = True
continue
storage_layout[
'root_device'] = root_dev.physical_devices[0]
storage_layout['root_lv_size'] = root_block.size
storage_layout['root_lv_name'] = root_block.name
storage_layout['root_vg_name'] = root_dev.name
if boot_block is not None:
storage_layout['boot_size'] = boot_block.size
self.logger.debug(
"Setting node %s root storage layout: %s" %
(node.name, str(storage_layout)))
machine.set_storage_layout(**storage_layout)
vg_devs = {}
for d in node.storage_devices:
maas_dev = machine.block_devices.singleton({
'name':
d.name
})
if maas_dev is None:
self.logger.warning("Dev %s not found on node %s" %
(d.name, node.name))
continue
if d.volume_group is not None:
self.logger.debug(
"Adding dev %s to volume group %s" %
(d.name, d.volume_group))
if d.volume_group not in vg_devs:
vg_devs[d.volume_group] = {'b': [], 'p': []}
vg_devs[d.volume_group]['b'].append(
maas_dev.resource_id)
continue
self.logger.debug("Partitioning dev %s on node %s" %
(d.name, node.name))
for p in d.partitions:
if p.is_sys():
self.logger.debug(
"Skipping manually configuring a system partition."
)
continue
maas_dev.refresh()
size = MaasTaskRunner.calculate_bytes(
size_str=p.size, context=maas_dev)
part = maas_partition.Partition(
self.maas_client,
size=size,
bootable=p.bootable)
if p.part_uuid is not None:
part.uuid = p.part_uuid
self.logger.debug(
"Creating partition %s on dev %s" % (p.name,
d.name))
part = maas_dev.create_partition(part)
if p.volume_group is not None:
self.logger.debug(
"Adding partition %s to volume group %s" %
(p.name, p.volume_group))
if p.volume_group not in vg_devs:
vg_devs[p.volume_group] = {
'b': [],
'p': []
}
vg_devs[p.volume_group]['p'].append(
part.resource_id)
if p.mountpoint is not None:
format_opts = {'fstype': p.fstype}
if p.fs_uuid is not None:
format_opts['uuid'] = str(p.fs_uuid)
if p.fs_label is not None:
format_opts['label'] = p.fs_label
self.logger.debug(
"Formatting partition %s as %s" %
(p.name, p.fstype))
part.format(**format_opts)
mount_opts = {
'mount_point': p.mountpoint,
'mount_options': p.mount_options,
}
self.logger.debug(
"Mounting partition %s on %s" %
(p.name, p.mountpoint))
part.mount(**mount_opts)
self.logger.debug(
"Finished configuring node %s partitions" % node.name)
for v in node.volume_groups:
if v.is_sys():
self.logger.debug(
"Skipping manually configuraing system VG.")
continue
if v.name not in vg_devs:
self.logger.warning(
"No physical volumes defined for VG %s, skipping."
% (v.name))
continue
maas_volgroup = maas_vg.VolumeGroup(
self.maas_client, name=v.name)
if v.vg_uuid is not None:
maas_volgroup.uuid = v.vg_uuid
if len(vg_devs[v.name]['b']) > 0:
maas_volgroup.block_devices = ','.join(
[str(x) for x in vg_devs[v.name]['b']])
if len(vg_devs[v.name]['p']) > 0:
maas_volgroup.partitions = ','.join(
[str(x) for x in vg_devs[v.name]['p']])
self.logger.debug(
"Creating volume group %s on node %s" %
(v.name, node.name))
maas_volgroup = machine.volume_groups.add(
maas_volgroup)
maas_volgroup.refresh()
for lv in v.logical_volumes:
calc_size = MaasTaskRunner.calculate_bytes(
size_str=lv.size, context=maas_volgroup)
bd_id = maas_volgroup.create_lv(
name=lv.name,
uuid_str=lv.lv_uuid,
size=calc_size)
if lv.mountpoint is not None:
machine.refresh()
maas_lv = machine.block_devices.select(bd_id)
self.logger.debug(
"Formatting LV %s as filesystem on node %s."
% (lv.name, node.name))
maas_lv.format(
fstype=lv.fstype, uuid_str=lv.fs_uuid)
self.logger.debug(
"Mounting LV %s at %s on node %s." %
(lv.name, lv.mountpoint, node.name))
maas_lv.mount(
mount_point=lv.mountpoint,
mount_options=lv.mount_options)
except Exception as ex:
raise errors.DriverError(str(ex))
if worked and failed:
final_result = hd_fields.ActionResult.PartialSuccess
elif failed:
final_result = hd_fields.ActionResult.Failure
else:
final_result = hd_fields.ActionResult.Success
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=final_result,
result_detail=result_detail)
elif task_action == hd_fields.OrchestratorAction.DeployNode:
try:
machine_list = maas_machine.Machines(self.maas_client)
machine_list.refresh()
fabrics = maas_fabric.Fabrics(self.maas_client)
fabrics.refresh()
subnets = maas_subnet.Subnets(self.maas_client)
subnets.refresh()
except Exception as ex:
self.logger.error(
"Error deploying node, cannot access MaaS: %s" % str(ex))
traceback.print_tb(sys.last_traceback)
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=hd_fields.ActionResult.Failure,
result_detail={
'detail': 'Error accessing MaaS API',
'retry': True
})
return
nodes = self.task.node_list
result_detail = {'detail': []}
worked = failed = False
for n in nodes:
self.logger.info("Acquiring node %s for deployment" % (n))
try:
node = site_design.get_baremetal_node(n)
machine = machine_list.identify_baremetal_node(
node, update_name=False)
if machine.status_name == 'Deployed':
self.logger.info(
"Node %s already deployed, skipping." % (n))
continue
elif machine.status_name == 'Ready':
machine = machine_list.acquire_node(n)
else:
self.logger.warning(
"Unexpected status %s for node %s, skipping deployment."
% (machine.status_name, n))
continue
except errors.DriverError as dex:
self.logger.warning(
"Error acquiring node %s, skipping" % n)
failed = True
continue
# Need to create bootdata keys for all the nodes being deployed
# TODO(sh8121att): this should be in the orchestrator
node = site_design.get_baremetal_node(n)
data_key = str(uuid.uuid4())
self.state_manager.set_bootdata_key(n, self.task.design_id,
data_key)
node.owner_data['bootdata_key'] = data_key
self.logger.debug("Configured bootdata for node %s" % (n))
# Set owner data in MaaS
try:
self.logger.info("Setting node %s owner data." % n)
for k, v in node.owner_data.items():
self.logger.debug(
"Set owner data %s = %s for node %s" % (k, v, n))
machine.set_owner_data(k, v)
except Exception as ex:
self.logger.warning(
"Error setting node %s owner data: %s" % (n, str(ex)))
failed = True
continue
self.logger.info("Deploying node %s" % (n))
try:
machine.deploy()
except errors.DriverError as dex:
self.logger.warning(
"Error deploying node %s, skipping" % n)
failed = True
continue
attempts = 0
max_attempts = cfg.CONF.timeouts.deploy_node * (
60 // cfg.CONF.maasdriver.poll_interval)
while (attempts < max_attempts
and (not machine.status_name.startswith('Deployed')
and not machine.status_name.startswith('Failed'))):
attempts = attempts + 1
time.sleep(cfg.CONF.maasdriver.poll_interval)
try:
machine.refresh()
self.logger.debug(
"Polling node %s status attempt %d of %d: %s" %
(n, attempts, max_attempts, machine.status_name))
except:
self.logger.warning(
"Error updating node %s status during commissioning, will re-attempt."
% (n))
if machine.status_name.startswith('Deployed'):
result_detail['detail'].append("Node %s deployed" % (n))
self.logger.info("Node %s deployed" % (n))
worked = True
else:
result_detail['detail'].append(
"Node %s deployment timed out" % (n))
self.logger.warning("Node %s deployment timed out." % (n))
failed = True
if worked and failed:
final_result = hd_fields.ActionResult.PartialSuccess
elif failed:
final_result = hd_fields.ActionResult.Failure
else:
final_result = hd_fields.ActionResult.Success
self.orchestrator.task_field_update(
self.task.get_id(),
status=hd_fields.TaskStatus.Complete,
result=final_result,
result_detail=result_detail)
@classmethod
def calculate_bytes(cls, size_str=None, context=None):
"""Calculate the size on bytes of a size_str.
Calculate the size as specified in size_str in the context of the provided
blockdev or vg. Valid size_str format below.
#m or #M or #mb or #MB = # * 1024 * 1024
#g or #G or #gb or #GB = # * 1024 * 1024 * 1024
#t or #T or #tb or #TB = # * 1024 * 1024 * 1024 * 1024
#% = Percentage of the total storage in the context
Prepend '>' to the above to note the size as a minimum and the calculated size being the
remaining storage available above the minimum
If the calculated size is not available in the context, a NotEnoughStorage exception is
raised.
:param size_str: A string representing the desired size
:param context: An instance of maasdriver.models.blockdev.BlockDevice or
instance of maasdriver.models.volumegroup.VolumeGroup. The
size_str is interpreted in the context of this device
:return size: The calculated size in bytes
"""
pattern = '(>?)(\d+)([mMbBgGtT%]{1,2})'
regex = re.compile(pattern)
match = regex.match(size_str)
if not match:
raise errors.InvalidSizeFormat(
"Invalid size string format: %s" % size_str)
if ((match.group(1) == '>' or match.group(3) == '%') and not context):
raise errors.InvalidSizeFormat(
'Sizes using the ">" or "%" format must specify a '
'block device or volume group context')
base_size = int(match.group(2))
if match.group(3) in ['m', 'M', 'mb', 'MB']:
computed_size = base_size * (1000 * 1000)
elif match.group(3) in ['g', 'G', 'gb', 'GB']:
computed_size = base_size * (1000 * 1000 * 1000)
elif match.group(3) in ['t', 'T', 'tb', 'TB']:
computed_size = base_size * (1000 * 1000 * 1000 * 1000)
elif match.group(3) == '%':
computed_size = math.floor((base_size / 100) * int(context.size))
if computed_size > int(context.available_size):
raise errors.NotEnoughStorage()
if match.group(1) == '>':
computed_size = int(context.available_size)
return computed_size
def list_opts():
return {MaasNodeDriver.driver_key: MaasNodeDriver.maasdriver_options}