summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorScott Hussey <sh8121@att.com>2018-10-01 20:37:11 -0500
committerScott Hussey <sh8121@att.com>2018-10-18 17:04:00 -0500
commit357cf7e45547c9d6a59ce6240de1a196b7b6e59f (patch)
tree3e825be7b5bf48ddfa22beb863c38fc2fd1149f6
parent9625febab6807f72d1ba0b44f9311dcff658389e (diff)
Support multiple rack controllers
- Update the maasdriver to support two concurrent rack controllers when configuring networking. - Identify a baremetal node from both the MAAS node and rack controller collections - Relax validation to only require at least one healthy rack controller Change-Id: I04beb56a8212b65061840021b13b412fbb37ae81
Notes
Notes (review): Code-Review+2: Mark Burnett <mark.m.burnett@gmail.com> Code-Review+2: Aaron Sheffield <ajs@sheffieldfamily.net> Workflow+1: Aaron Sheffield <ajs@sheffieldfamily.net> Code-Review+1: Serge Kovaleff <sk607s@att.com> Verified+2: Zuul Submitted-by: Zuul Submitted-at: Tue, 23 Oct 2018 18:52:25 +0000 Reviewed-on: https://review.openstack.org/607086 Project: openstack/airship-drydock Branch: refs/heads/master
-rw-r--r--docs/source/images/architecture.pngbin21469 -> 21469 bytes
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/actions/node.py269
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/api_client.py5
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/errors.py26
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/models/base.py2
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/models/interface.py12
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/models/machine.py86
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/models/rack_controller.py25
-rw-r--r--python/drydock_provisioner/drivers/node/maasdriver/models/vlan.py36
-rw-r--r--python/drydock_provisioner/objects/node.py4
-rw-r--r--python/tests/unit/test_maasdriver_vlan.py49
11 files changed, 394 insertions, 120 deletions
diff --git a/docs/source/images/architecture.png b/docs/source/images/architecture.png
index 00a6f93..04dc208 100644
--- a/docs/source/images/architecture.png
+++ b/docs/source/images/architecture.png
Binary files differ
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py b/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py
index 732aec9..d261de5 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py
@@ -29,6 +29,8 @@ import drydock_provisioner.objects as objects
29 29
30from drydock_provisioner.control.util import get_internal_api_href 30from drydock_provisioner.control.util import get_internal_api_href
31from drydock_provisioner.orchestrator.actions.orchestrator import BaseAction 31from drydock_provisioner.orchestrator.actions.orchestrator import BaseAction
32from drydock_provisioner.drivers.node.maasdriver.errors import RackControllerConflict
33from drydock_provisioner.drivers.node.maasdriver.errors import ApiNotAvailable
32 34
33import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric 35import drydock_provisioner.drivers.node.maasdriver.models.fabric as maas_fabric
34import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan 36import drydock_provisioner.drivers.node.maasdriver.models.vlan as maas_vlan
@@ -138,25 +140,28 @@ class ValidateNodeServices(BaseMaasAction):
138 ctx_type='NA') 140 ctx_type='NA')
139 self.task.failure() 141 self.task.failure()
140 else: 142 else:
143 healthy_rackd = []
141 for r in rack_ctlrs: 144 for r in rack_ctlrs:
142 rack_svc = r.get_services() 145 if r.is_healthy():
143 rack_name = r.hostname 146 healthy_rackd.append(r.hostname)
144 147 else:
145 for s in rack_svc: 148 msg = "Rack controller %s not healthy." % r.hostname
146 if s in maas_rack.RackController.REQUIRED_SERVICES: 149 self.logger.info(msg)
147 is_error = False 150 self.task.add_status_msg(
148 if rack_svc[s] not in ("running", "off"): 151 msg=msg,
149 self.task.failure() 152 error=True,
150 is_error = True 153 ctx=r.hostname,
151 self.logger.info( 154 ctx_type='rack_ctlr')
152 "Service %s on rackd %s is %s" % 155 if not healthy_rackd:
153 (s, rack_name, rack_svc[s])) 156 msg = "No healthy rack controllers found."
154 self.task.add_status_msg( 157 self.logger.info(msg)
155 msg="Service %s on rackd %s is %s" % 158 self.task.add_status_msg(
156 (s, rack_name, rack_svc[s]), 159 msg=msg,
157 error=is_error, 160 error=True,
158 ctx=rack_name, 161 ctx='maas',
159 ctx_type='rack_ctlr') 162 ctx_type='cluster')
163 self.task.failure()
164
160 except errors.TransientDriverError as ex: 165 except errors.TransientDriverError as ex:
161 self.task.add_status_msg( 166 self.task.add_status_msg(
162 msg=str(ex), error=True, ctx='NA', ctx_type='NA', retry=True) 167 msg=str(ex), error=True, ctx='NA', ctx_type='NA', retry=True)
@@ -278,8 +283,7 @@ class DestroyNode(BaseMaasAction):
278 site_design) 283 site_design)
279 for n in nodes: 284 for n in nodes:
280 try: 285 try:
281 machine = machine_list.identify_baremetal_node( 286 machine = find_node_in_maas(self.maas_client, n)
282 n, update_name=False)
283 287
284 if machine is None: 288 if machine is None:
285 msg = "Could not locate machine for node {}".format(n.name) 289 msg = "Could not locate machine for node {}".format(n.name)
@@ -288,6 +292,13 @@ class DestroyNode(BaseMaasAction):
288 msg=msg, error=False, ctx=n.name, ctx_type='node') 292 msg=msg, error=False, ctx=n.name, ctx_type='node')
289 self.task.success(focus=n.get_id()) 293 self.task.success(focus=n.get_id())
290 continue 294 continue
295 elif type(machine) == maas_rack.RackController:
296 msg = "Cannot delete rack controller {}.".format(n.name)
297 self.logger.info(msg)
298 self.task.add_status_msg(
299 msg=msg, error=False, ctx=n.name, ctx_type='node')
300 self.task.failure(focus=n.get_id())
301 continue
291 302
292 # First release the node and erase its disks, if MaaS API allows 303 # First release the node and erase its disks, if MaaS API allows
293 if machine.status_name in self.actionable_node_statuses: 304 if machine.status_name in self.actionable_node_statuses:
@@ -687,7 +698,7 @@ class CreateNetworkTemplate(BaseMaasAction):
687 vlan_list.refresh() 698 vlan_list.refresh()
688 vlan = vlan_list.select(subnet.vlan) 699 vlan = vlan_list.select(subnet.vlan)
689 700
690 if dhcp_on and not vlan.dhcp_on: 701 if dhcp_on:
691 # check if design requires a dhcp relay and if the MaaS vlan already uses a dhcp_relay 702 # check if design requires a dhcp relay and if the MaaS vlan already uses a dhcp_relay
692 msg = "DHCP enabled for subnet %s, activating in MaaS" % ( 703 msg = "DHCP enabled for subnet %s, activating in MaaS" % (
693 subnet.name) 704 subnet.name)
@@ -702,12 +713,25 @@ class CreateNetworkTemplate(BaseMaasAction):
702 self.maas_client) 713 self.maas_client)
703 rack_ctlrs.refresh() 714 rack_ctlrs.refresh()
704 715
716 # Reset DHCP stuff to avoid offline rack controllers
717
718 vlan.reset_dhcp_mgmt()
705 dhcp_config_set = False 719 dhcp_config_set = False
706 720
707 for r in rack_ctlrs: 721 for r in rack_ctlrs:
708 if n.dhcp_relay_upstream_target is not None: 722 if n.dhcp_relay_upstream_target is not None:
709 if r.interface_for_ip( 723 if r.interface_for_ip(
710 n.dhcp_relay_upstream_target): 724 n.dhcp_relay_upstream_target):
725 if not r.is_healthy():
726 msg = ("Rack controller %s with DHCP relay is not healthy." %
727 r.hostname)
728 self.logger.info(msg)
729 self.task.add_status_msg(
730 msg=msg,
731 error=True,
732 ctx=n.name,
733 ctx_type='network')
734 break
711 iface = r.interface_for_ip( 735 iface = r.interface_for_ip(
712 n.dhcp_relay_upstream_target) 736 n.dhcp_relay_upstream_target)
713 vlan.relay_vlan = iface.vlan 737 vlan.relay_vlan = iface.vlan
@@ -730,21 +754,42 @@ class CreateNetworkTemplate(BaseMaasAction):
730 self.logger.debug(msg) 754 self.logger.debug(msg)
731 rackctl_id = r.resource_id 755 rackctl_id = r.resource_id
732 756
733 vlan.dhcp_on = True 757 if not r.is_healthy():
734 vlan.primary_rack = rackctl_id 758 msg = ("Rack controller %s not healthy, skipping DHCP config." %
735 msg = "Enabling DHCP on VLAN %s managed by rack ctlr %s" % ( 759 r.resource_id)
736 vlan.resource_id, rackctl_id) 760 self.logger.info(msg)
737 self.logger.debug(msg) 761 self.task.add_status_msg(
738 self.task.add_status_msg( 762 msg=msg,
739 msg=msg, 763 error=True,
740 error=False, 764 ctx=n.name,
741 ctx=n.name, 765 ctx_type='network')
742 ctx_type='network') 766 break
743 vlan.update() 767 try:
744 dhcp_config_set = True 768 vlan.dhcp_on = True
769 vlan.add_rack_controller(
770 rackctl_id)
771 msg = "Enabling DHCP on VLAN %s managed by rack ctlr %s" % (
772 vlan.resource_id, rackctl_id)
773 self.logger.debug(msg)
774 self.task.add_status_msg(
775 msg=msg,
776 error=False,
777 ctx=n.name,
778 ctx_type='network')
779 vlan.update()
780 dhcp_config_set = True
781 except RackControllerConflict as rack_ex:
782 msg = (
783 "More than two rack controllers on vlan %s, "
784 "skipping enabling %s." %
785 (vlan.resource_id, rackctl_id))
786 self.logger.debug(msg)
787 self.task.add_status_msg(
788 msg=msg,
789 error=False,
790 ctx=n.name,
791 ctx_type='network')
745 break 792 break
746 if dhcp_config_set:
747 break
748 793
749 if not dhcp_config_set: 794 if not dhcp_config_set:
750 msg = "Network %s requires DHCP, but could not locate a rack controller to serve it." % ( 795 msg = "Network %s requires DHCP, but could not locate a rack controller to serve it." % (
@@ -757,9 +802,6 @@ class CreateNetworkTemplate(BaseMaasAction):
757 ctx_type='network') 802 ctx_type='network')
758 self.task.failure(focus=n.name) 803 self.task.failure(focus=n.name)
759 804
760 elif dhcp_on and vlan.dhcp_on:
761 self.logger.info("DHCP already enabled for subnet %s" %
762 (subnet.resource_id))
763 except ValueError: 805 except ValueError:
764 raise errors.DriverError("Inconsistent data from MaaS") 806 raise errors.DriverError("Inconsistent data from MaaS")
765 807
@@ -1026,21 +1068,6 @@ class IdentifyNode(BaseMaasAction):
1026 """Action to identify a node resource in MaaS matching a node design.""" 1068 """Action to identify a node resource in MaaS matching a node design."""
1027 1069
1028 def start(self): 1070 def start(self):
1029 try:
1030 machine_list = maas_machine.Machines(self.maas_client)
1031 machine_list.refresh()
1032 except Exception as ex:
1033 self.logger.debug("Error accessing the MaaS API.", exc_info=ex)
1034 self.task.set_status(hd_fields.TaskStatus.Complete)
1035 self.task.failure()
1036 self.task.add_status_msg(
1037 msg='Error accessing MaaS Machines API: %s' % str(ex),
1038 error=True,
1039 ctx='NA',
1040 ctx_type='NA')
1041 self.task.save()
1042 return
1043
1044 self.task.set_status(hd_fields.TaskStatus.Running) 1071 self.task.set_status(hd_fields.TaskStatus.Running)
1045 self.task.save() 1072 self.task.save()
1046 1073
@@ -1062,37 +1089,56 @@ class IdentifyNode(BaseMaasAction):
1062 1089
1063 for n in nodes: 1090 for n in nodes:
1064 try: 1091 try:
1065 machine = machine_list.identify_baremetal_node( 1092 machine = find_node_in_maas(self.maas_client, n)
1066 n, domain=n.get_domain(site_design)) 1093 if machine is None:
1067 if machine is not None: 1094 self.task.failure(focus=n.get_id())
1068 self.task.success(focus=n.get_id()) 1095 self.task.add_status_msg(
1096 msg="Node %s not found in MaaS" % n.name,
1097 error=True,
1098 ctx=n.name,
1099 ctx_type='node')
1100 elif type(machine) == maas_machine.Machine:
1101 machine.update_identity(n, domain=n.get_domain(site_design))
1102 msg = "Node %s identified in MaaS" % n.name
1103 self.logger.debug(msg)
1069 self.task.add_status_msg( 1104 self.task.add_status_msg(
1070 msg="Node %s identified in MaaS" % n.name, 1105 msg=msg,
1071 error=False, 1106 error=False,
1072 ctx=n.name, 1107 ctx=n.name,
1073 ctx_type='node') 1108 ctx_type='node')
1074 else: 1109 self.task.success(focus=n.get_id())
1075 self.task.failure(focus=n.get_id()) 1110 elif type(machine) == maas_rack.RackController:
1111 msg = "Rack controller %s identified in MaaS" % n.name
1112 self.logger.debug(msg)
1076 self.task.add_status_msg( 1113 self.task.add_status_msg(
1077 msg="Node %s not found in MaaS" % n.name, 1114 msg=msg,
1078 error=True, 1115 error=False,
1079 ctx=n.name, 1116 ctx=n.name,
1080 ctx_type='node') 1117 ctx_type='node')
1118 self.task.success(focus=n.get_id())
1119 except ApiNotAvailable as api_ex:
1120 self.logger.debug("Error accessing the MaaS API.", exc_info=api_ex)
1121 self.task.failure()
1122 self.task.add_status_msg(
1123 msg='Error accessing MaaS API: %s' % str(api_ex),
1124 error=True,
1125 ctx='NA',
1126 ctx_type='NA')
1127 self.task.save()
1081 except Exception as ex: 1128 except Exception as ex:
1129 self.logger.debug(
1130 "Exception caught in identify node.", exc_info=ex)
1082 self.task.failure(focus=n.get_id()) 1131 self.task.failure(focus=n.get_id())
1083 self.task.add_status_msg( 1132 self.task.add_status_msg(
1084 msg="Node %s not found in MaaS" % n.name, 1133 msg="Error trying to location %s in MAAS" % n.name,
1085 error=True, 1134 error=True,
1086 ctx=n.name, 1135 ctx=n.name,
1087 ctx_type='node') 1136 ctx_type='node')
1088 self.logger.debug(
1089 "Exception caught in identify node.", exc_info=ex)
1090 1137
1091 self.task.set_status(hd_fields.TaskStatus.Complete) 1138 self.task.set_status(hd_fields.TaskStatus.Complete)
1092 self.task.save() 1139 self.task.save()
1093 return 1140 return
1094 1141
1095
1096class ConfigureHardware(BaseMaasAction): 1142class ConfigureHardware(BaseMaasAction):
1097 """Action to start commissioning a server.""" 1143 """Action to start commissioning a server."""
1098 1144
@@ -1136,9 +1182,15 @@ class ConfigureHardware(BaseMaasAction):
1136 try: 1182 try:
1137 self.logger.debug( 1183 self.logger.debug(
1138 "Locating node %s for commissioning" % (n.name)) 1184 "Locating node %s for commissioning" % (n.name))
1139 machine = machine_list.identify_baremetal_node( 1185 machine = find_node_in_maas(self.maas_client, n)
1140 n, update_name=False) 1186 if type(machine) == maas_rack.RackController:
1141 if machine is not None: 1187 msg = "Located node %s in MaaS as rack controller. Skipping." % (
1188 n.name)
1189 self.logger.info(msg)
1190 self.task.add_status_msg(
1191 msg=msg, error=False, ctx=n.name, ctx_type='node')
1192 self.task.success(focus=n.get_id())
1193 elif machine is not None:
1142 if machine.status_name in [ 1194 if machine.status_name in [
1143 'New', 'Broken', 'Failed commissioning', 1195 'New', 'Broken', 'Failed commissioning',
1144 'Failed testing' 1196 'Failed testing'
@@ -1215,7 +1267,7 @@ class ConfigureHardware(BaseMaasAction):
1215 msg=msg, error=False, ctx=n.name, ctx_type='node') 1267 msg=msg, error=False, ctx=n.name, ctx_type='node')
1216 self.task.success(focus=n.get_id()) 1268 self.task.success(focus=n.get_id())
1217 else: 1269 else:
1218 msg = "Located node %s in MaaS, unknown status %s. Skipping..." % ( 1270 msg = "Located node %s in MaaS, unknown status %s. Skipping." % (
1219 n, machine.status_name) 1271 n, machine.status_name)
1220 self.logger.warning(msg) 1272 self.logger.warning(msg)
1221 self.task.add_status_msg( 1273 self.task.add_status_msg(
@@ -1323,10 +1375,20 @@ class ApplyNodeNetworking(BaseMaasAction):
1323 self.logger.debug( 1375 self.logger.debug(
1324 "Locating node %s for network configuration" % (n.name)) 1376 "Locating node %s for network configuration" % (n.name))
1325 1377
1326 machine = machine_list.identify_baremetal_node( 1378 machine = find_node_in_maas(self.maas_client, n)
1327 n, update_name=False)
1328 1379
1329 if machine is not None: 1380 if type(machine) is maas_rack.RackController:
1381 msg = ("Node %s is a rack controller, skipping deploy action." %
1382 n.name)
1383 self.logger.debug(msg)
1384 self.task.add_status_msg(
1385 msg=msg,
1386 error=False,
1387 ctx=n.name,
1388 ctx_type='node')
1389 self.task.success(focus=n.name)
1390 continue
1391 elif machine is not None:
1330 if machine.status_name.startswith('Failed Dep'): 1392 if machine.status_name.startswith('Failed Dep'):
1331 msg = ( 1393 msg = (
1332 "Node %s has failed deployment, releasing to try again." 1394 "Node %s has failed deployment, releasing to try again."
@@ -1677,8 +1739,7 @@ class ApplyNodePlatform(BaseMaasAction):
1677 self.logger.debug( 1739 self.logger.debug(
1678 "Locating node %s for platform configuration" % (n.name)) 1740 "Locating node %s for platform configuration" % (n.name))
1679 1741
1680 machine = machine_list.identify_baremetal_node( 1742 machine = find_node_in_maas(self.maas_client, n)
1681 n, update_name=False)
1682 1743
1683 if machine is None: 1744 if machine is None:
1684 msg = "Could not locate machine for node %s" % n.name 1745 msg = "Could not locate machine for node %s" % n.name
@@ -1695,7 +1756,14 @@ class ApplyNodePlatform(BaseMaasAction):
1695 msg=msg, error=True, ctx=n.name, ctx_type='node') 1756 msg=msg, error=True, ctx=n.name, ctx_type='node')
1696 continue 1757 continue
1697 1758
1698 if machine.status_name == 'Deployed': 1759 if type(machine) is maas_rack.RackController:
1760 msg = ("Skipping changes to rack controller %s." % n.name)
1761 self.logger.info(msg)
1762 self.task.add_status_msg(
1763 msg=msg, error=False, ctx=n.name, ctx_type='node')
1764 self.task.success(focus=n.name)
1765 continue
1766 elif machine.status_name == 'Deployed':
1699 msg = ( 1767 msg = (
1700 "Located node %s in MaaS, status deployed. Skipping " 1768 "Located node %s in MaaS, status deployed. Skipping "
1701 "and considering success. Destroy node first if redeploy needed." 1769 "and considering success. Destroy node first if redeploy needed."
@@ -1860,8 +1928,7 @@ class ApplyNodeStorage(BaseMaasAction):
1860 self.logger.debug( 1928 self.logger.debug(
1861 "Locating node %s for storage configuration" % (n.name)) 1929 "Locating node %s for storage configuration" % (n.name))
1862 1930
1863 machine = machine_list.identify_baremetal_node( 1931 machine = find_node_in_maas(self.maas_client, n)
1864 n, update_name=False)
1865 1932
1866 if machine is None: 1933 if machine is None:
1867 msg = "Could not locate machine for node %s" % n.name 1934 msg = "Could not locate machine for node %s" % n.name
@@ -1878,7 +1945,15 @@ class ApplyNodeStorage(BaseMaasAction):
1878 self.task.failure(focus=n.get_id()) 1945 self.task.failure(focus=n.get_id())
1879 continue 1946 continue
1880 1947
1881 if machine.status_name == 'Deployed': 1948 if type(machine) is maas_rack.RackController:
1949 msg = ("Skipping configuration updates to rack controller %s." %
1950 n.name)
1951 self.logger.info(msg)
1952 self.task.add_status_msg(
1953 msg=msg, error=False, ctx=n.name, ctx_type='node')
1954 self.task.success(focus=n.name)
1955 continue
1956 elif machine.status_name == 'Deployed':
1882 msg = ( 1957 msg = (
1883 "Located node %s in MaaS, status deployed. Skipping " 1958 "Located node %s in MaaS, status deployed. Skipping "
1884 "and considering success. Destroy node first if redeploy needed." 1959 "and considering success. Destroy node first if redeploy needed."
@@ -2203,9 +2278,16 @@ class DeployNode(BaseMaasAction):
2203 2278
2204 for n in nodes: 2279 for n in nodes:
2205 try: 2280 try:
2206 machine = machine_list.identify_baremetal_node( 2281 machine = find_node_in_maas(self.maas_client, n)
2207 n, update_name=False) 2282
2208 if machine.status_name.startswith( 2283 if type(machine) is maas_rack.RackController:
2284 msg = "Skipping configuration of rack controller %s." % n.name
2285 self.logger.info(msg)
2286 self.task.add_status_msg(
2287 msg=msg, error=False, ctx=n.name, ctx_type='node')
2288 self.task.success(focus=n.name)
2289 continue
2290 elif machine.status_name.startswith(
2209 'Deployed') or machine.status_name.startswith( 2291 'Deployed') or machine.status_name.startswith(
2210 'Deploying'): 2292 'Deploying'):
2211 msg = "Node %s already deployed or deploying, skipping." % ( 2293 msg = "Node %s already deployed or deploying, skipping." % (
@@ -2359,3 +2441,26 @@ class DeployNode(BaseMaasAction):
2359 self.task.save() 2441 self.task.save()
2360 2442
2361 return 2443 return
2444
2445def find_node_in_maas(maas_client, node_model):
2446 """Find a node in MAAS matching the node_model.
2447
2448 Note that the returned Machine may be a simple Machine or
2449 a RackController.
2450
2451 :param maas_client: instance of an active session to MAAS
2452 :param node_model: instance of objects.Node to match
2453 :returns: instance of maasdriver.models.Machine
2454 """
2455
2456 machine_list = maas_machine.Machines(maas_client)
2457 machine_list.refresh()
2458 machine = machine_list.identify_baremetal_node(node_model)
2459
2460 if not machine:
2461 # If node isn't found a normal node, check rack controllers
2462 rackd_list = maas_rack.RackControllers(maas_client)
2463 rackd_list.refresh()
2464 machine = rackd_list.identify_baremetal_node(node_model)
2465
2466 return machine
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/api_client.py b/python/drydock_provisioner/drivers/node/maasdriver/api_client.py
index 4442e1a..949c790 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/api_client.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/api_client.py
@@ -128,8 +128,9 @@ class MaasRequestFactory(object):
128 128
129 for (k, v) in files.items(): 129 for (k, v) in files.items():
130 if v is None: 130 if v is None:
131 continue 131 v = ""
132 elif isinstance(v, list): 132
133 if isinstance(v, list):
133 for i in v: 134 for i in v:
134 value = base64.b64encode( 135 value = base64.b64encode(
135 str(i).encode('utf-8')).decode('utf-8') 136 str(i).encode('utf-8')).decode('utf-8')
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/errors.py b/python/drydock_provisioner/drivers/node/maasdriver/errors.py
new file mode 100644
index 0000000..25237ef
--- /dev/null
+++ b/python/drydock_provisioner/drivers/node/maasdriver/errors.py
@@ -0,0 +1,26 @@
1# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
2# Licensed under the Apache License, Version 2.0 (the "License");
3# you may not use this file except in compliance with the License.
4# You may obtain a copy of the License at
5#
6# http://www.apache.org/licenses/LICENSE-2.0
7#
8# Unless required by applicable law or agreed to in writing, software
9# distributed under the License is distributed on an "AS IS" BASIS,
10# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11# See the License for the specific language governing permissions and
12# limitations under the License.
13"""Errors and exceptions specific to MAAS node driver."""
14import drydock_provisioner.error as errors
15
16
17class RackControllerConflict(errors.DriverError):
18 """Exception for settings that are not allowed because not enough
19 or too many rack controllers are attached to a network."""
20 pass
21
22
23class ApiNotAvailable(errors.DriverError):
24 """Exception when trying to utilize the MAAS API and the connection
25 fails."""
26 pass
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/base.py b/python/drydock_provisioner/drivers/node/maasdriver/models/base.py
index 70ce79a..883472b 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/models/base.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/models/base.py
@@ -37,6 +37,8 @@ class ResourceBase(object):
37 for f in self.fields: 37 for f in self.fields:
38 if f in kwargs.keys(): 38 if f in kwargs.keys():
39 setattr(self, f, kwargs.get(f)) 39 setattr(self, f, kwargs.get(f))
40 else:
41 setattr(self, f, None)
40 42
41 """ 43 """
42 Update resource attributes from MaaS 44 Update resource attributes from MaaS
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/interface.py b/python/drydock_provisioner/drivers/node/maasdriver/models/interface.py
index 9c455af..655dab8 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/models/interface.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/models/interface.py
@@ -235,6 +235,18 @@ class Interface(model_base.ResourceBase):
235 235
236 return False 236 return False
237 237
238 def responds_to_mac(self, mac_address):
239 """Check if this interface will respond to a MAC address.
240
241 :param str mac_address: the MAC address to check
242
243 :return: true if this interface will respond to this MAC
244 """
245 if mac_address.replace(':', '').upper() == self.mac_address.replace(':', '').upper():
246 return True
247
248 return False
249
238 def set_mtu(self, new_mtu): 250 def set_mtu(self, new_mtu):
239 """Set interface MTU. 251 """Set interface MTU.
240 252
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py b/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py
index 4add289..2abb41d 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py
@@ -77,6 +77,18 @@ class Machine(model_base.ResourceBase):
77 return i 77 return i
78 return None 78 return None
79 79
80 def interface_for_mac(self, mac_address):
81 """Find the machine interface that owns the specified ``mac_address``.
82
83 :param str mac_address: The MAC address
84
85 :return: the interface that responds to this MAC or None
86 """
87 for i in self.interfaces:
88 if i.responds_to_mac(mac_address):
89 return i
90 return None
91
80 def get_power_params(self): 92 def get_power_params(self):
81 """Load power parameters for this node from MaaS.""" 93 """Load power parameters for this node from MaaS."""
82 url = self.interpolate_url() 94 url = self.interpolate_url()
@@ -426,6 +438,30 @@ class Machine(model_base.ResourceBase):
426 "Failed updating power parameters MAAS url {} - return code {}\n{}" 438 "Failed updating power parameters MAAS url {} - return code {}\n{}"
427 .format(url, resp.status_code.resp.text)) 439 .format(url, resp.status_code.resp.text))
428 440
441 def update_identity(self, n, domain="local"):
442 """Update this node's identity based on the Node object ``n``
443
444 :param objects.Node n: The Node object to use as reference
445 :param str domain: The DNS domain to register this node under
446 """
447 try:
448 self.hostname = n.name
449 self.domain = domain
450 self.update()
451 if n.oob_type == 'libvirt':
452 self.logger.debug(
453 "Updating node %s MaaS power parameters for libvirt." %
454 (n.name))
455 oob_params = n.oob_parameters
456 self.set_power_parameters(
457 'virsh',
458 power_address=oob_params.get('libvirt_uri'),
459 power_id=n.name)
460 self.logger.debug("Updated MaaS resource %s hostname to %s" %
461 (self.resource_id, n.name))
462 except Exception as ex:
463 self.logger.debug("Error updating MAAS node: %s" % str(ex))
464
429 def to_dict(self): 465 def to_dict(self):
430 """Serialize this resource instance into a dict. 466 """Serialize this resource instance into a dict.
431 467
@@ -522,9 +558,7 @@ class Machines(model_base.ResourceCollectionBase):
522 return node 558 return node
523 559
524 def identify_baremetal_node(self, 560 def identify_baremetal_node(self,
525 node_model, 561 node_model):
526 update_name=True,
527 domain="local"):
528 """Find MaaS node resource matching Drydock BaremetalNode. 562 """Find MaaS node resource matching Drydock BaremetalNode.
529 563
530 Search all the defined MaaS Machines and attempt to match 564 Search all the defined MaaS Machines and attempt to match
@@ -532,7 +566,6 @@ class Machines(model_base.ResourceCollectionBase):
532 the MaaS instance with the correct hostname 566 the MaaS instance with the correct hostname
533 567
534 :param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource 568 :param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource
535 :param update_name: Whether Drydock should update the MaaS resource name to match the Drydock design
536 """ 569 """
537 maas_node = None 570 maas_node = None
538 571
@@ -552,46 +585,37 @@ class Machines(model_base.ResourceCollectionBase):
552 node_oob_ip 585 node_oob_ip
553 }) 586 })
554 except ValueError: 587 except ValueError:
555 self.logger.warn( 588 self.logger.info(
556 "Error locating matching MaaS resource for OOB IP %s" % 589 "Error locating matching MaaS resource for OOB IP %s" %
557 (node_oob_ip)) 590 (node_oob_ip))
558 return None 591 return None
559 else: 592 else:
560 # Use boot_mac for node's not using IPMI 593 # Use boot_mac for node's not using IPMI
561 node_boot_mac = node_model.boot_mac 594 nodes = self.find_nodes_with_mac(node_model.boot_mac)
562 595
563 if node_boot_mac is not None: 596 if len(nodes) == 1:
564 maas_node = self.singleton({'boot_mac': node_model.boot_mac}) 597 maas_node = nodes[0]
598 else:
599 self.logger.debug("Error: Found %d nodes with MAC %s", len(nodes), node_model.boot_mac)
600 maas_node = None
565 601
566 if maas_node is None: 602 if maas_node is None:
567 self.logger.info( 603 self.logger.info(
568 "Could not locate node %s in MaaS" % node_model.name) 604 "Could not locate node %s in MaaS" % node_model.name)
569 return None 605 else:
570 606 self.logger.debug("Found MaaS resource %s matching Node %s" %
571 self.logger.debug("Found MaaS resource %s matching Node %s" % 607 (maas_node.resource_id, node_model.get_id()))
572 (maas_node.resource_id, node_model.get_id()))
573
574 if maas_node.hostname != node_model.name and update_name:
575 try:
576 maas_node.hostname = node_model.name
577 maas_node.domain = domain
578 maas_node.update()
579 if node_model.oob_type == 'libvirt':
580 self.logger.debug(
581 "Updating node %s MaaS power parameters for libvirt." %
582 (node_model.name))
583 oob_params = node_model.oob_parameters
584 maas_node.set_power_parameters(
585 'virsh',
586 power_address=oob_params.get('libvirt_uri'),
587 power_id=node_model.name)
588 self.logger.debug("Updated MaaS resource %s hostname to %s" %
589 (maas_node.resource_id, node_model.name))
590 except Exception as ex:
591 self.logger.debug("Error updating MAAS node: %s" % str(ex))
592 608
593 return maas_node 609 return maas_node
594 610
611 def find_nodes_with_mac(self, mac_address):
612 """Find a list of nodes that own a NIC with ``mac_address``"""
613 node_list = []
614 for n in self.resources.values():
615 if n.interface_for_mac(mac_address):
616 node_list.append(n)
617 return node_list
618
595 def query(self, query): 619 def query(self, query):
596 """Custom query method to deal with complex fields.""" 620 """Custom query method to deal with complex fields."""
597 result = list(self.resources.values()) 621 result = list(self.resources.values())
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/rack_controller.py b/python/drydock_provisioner/drivers/node/maasdriver/models/rack_controller.py
index 00a8c8d..71ce8bc 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/models/rack_controller.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/models/rack_controller.py
@@ -13,7 +13,7 @@
13# limitations under the License. 13# limitations under the License.
14"""Model for MaaS rack-controller API resource.""" 14"""Model for MaaS rack-controller API resource."""
15 15
16import drydock_provisioner.drivers.node.maasdriver.models.base as model_base 16import drydock_provisioner.error as errors
17import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine 17import drydock_provisioner.drivers.node.maasdriver.models.machine as maas_machine
18 18
19 19
@@ -64,8 +64,25 @@ class RackController(maas_machine.Machine):
64 64
65 return svc_status 65 return svc_status
66 66
67 def update_identity(self, n, domain="local"):
68 """Cannot update rack controller identity."""
69 self.logger.debug("Cannot update rack controller identity for %s, no-op." %
70 self.hostname)
71 return
67 72
68class RackControllers(model_base.ResourceCollectionBase): 73 def is_healthy(self):
74 """Check if this rack controller appears healthy based on service status."""
75 rack_svc = self.get_services()
76 healthy = True
77 for s in rack_svc:
78 if s in RackController.REQUIRED_SERVICES:
79 # TODO(sh8121att) for dhcpd, ensure it is running if this rack controller
80 # is a primary or secondary for a VLAN
81 if rack_svc[s] not in ("running", "off"):
82 healthy = False
83 return healthy
84
85class RackControllers(maas_machine.Machines):
69 """Model for a collection of rack controllers.""" 86 """Model for a collection of rack controllers."""
70 87
71 collection_url = 'rackcontrollers/' 88 collection_url = 'rackcontrollers/'
@@ -73,3 +90,7 @@ class RackControllers(model_base.ResourceCollectionBase):
73 90
74 def __init__(self, api_client, **kwargs): 91 def __init__(self, api_client, **kwargs):
75 super().__init__(api_client) 92 super().__init__(api_client)
93
94 def acquire_node(self, node_name):
95 """Acquire not valid for nodes that are Rack Controllers."""
96 raise errors.DriverError("Rack controllers cannot be acquired.")
diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/vlan.py b/python/drydock_provisioner/drivers/node/maasdriver/models/vlan.py
index f403298..87219f7 100644
--- a/python/drydock_provisioner/drivers/node/maasdriver/models/vlan.py
+++ b/python/drydock_provisioner/drivers/node/maasdriver/models/vlan.py
@@ -14,6 +14,7 @@
14"""Models representing MaaS VLAN resources.""" 14"""Models representing MaaS VLAN resources."""
15 15
16import drydock_provisioner.drivers.node.maasdriver.models.base as model_base 16import drydock_provisioner.drivers.node.maasdriver.models.base as model_base
17from drydock_provisioner.drivers.node.maasdriver.errors import RackControllerConflict
17 18
18 19
19class Vlan(model_base.ResourceBase): 20class Vlan(model_base.ResourceBase):
@@ -65,6 +66,41 @@ class Vlan(model_base.ResourceBase):
65 else: 66 else:
66 self.vid = int(new_vid) 67 self.vid = int(new_vid)
67 68
69 def add_rack_controller(self, rack_id):
70 """Add a rack controller that manages DHCP on this VLAN.
71
72 Whichever of primary_rack or secondary_rack, in that order,
73 is not set - set to ``rack_id``. If both are already set
74 raise RackControllerConflict exception.
75 """
76 if not self.primary_rack or self.primary_rack == rack_id:
77 self.logger.debug("Setting primary DHCP controller %s on VLAN %s", rack_id, self.resource_id)
78 self.primary_rack = rack_id
79 elif not self.secondary_rack or self.secondary_rack == rack_id:
80 self.logger.debug("Setting secondary DHCP controller %s on VLAN %s.", rack_id, self.resource_id)
81 self.secondary_rack = rack_id
82 else:
83 raise RackControllerConflict(
84 "Both primary and secondary rack controllers already set.")
85
86 def reset_dhcp_mgmt(self, commit=False):
87 """Reset the DHCP control for this VLAN.
88
89 Reset the settings in the model impacting DHCP control on this
90 VLAN. Only commit these changes to the MAAS API if ``commit`` is
91 True.
92
93 :param bool commit: Whether to commit reset to MAAS API
94 """
95 self.logger.debug("Resetting DHCP control on VLAN %s.", self.resource_id)
96 self.relay_vlan = None
97 self.dhcp_on = False
98 self.primary_rack = None
99 self.secondary_rack = None
100
101 if commit:
102 self.update()
103
68 def set_dhcp_relay(self, relay_vlan_id): 104 def set_dhcp_relay(self, relay_vlan_id):
69 self.relay_vlan = relay_vlan_id 105 self.relay_vlan = relay_vlan_id
70 self.update() 106 self.update()
diff --git a/python/drydock_provisioner/objects/node.py b/python/drydock_provisioner/objects/node.py
index 6e38f97..dc8cae0 100644
--- a/python/drydock_provisioner/objects/node.py
+++ b/python/drydock_provisioner/objects/node.py
@@ -53,12 +53,10 @@ class BaremetalNode(drydock_provisioner.objects.hostprofile.HostProfile):
53 site_design, 53 site_design,
54 state_manager, 54 state_manager,
55 resolve_aliases=False): 55 resolve_aliases=False):
56 self.logger.debug("Applying host profile to node %s" % self.name) 56 self.logger.debug("Compiling effective node model for %s" % self.name)
57 self.apply_host_profile(site_design) 57 self.apply_host_profile(site_design)
58 self.logger.debug("Applying hardware profile to node %s" % self.name)
59 self.apply_hardware_profile(site_design) 58 self.apply_hardware_profile(site_design)
60 self.source = hd_fields.ModelSource.Compiled 59 self.source = hd_fields.ModelSource.Compiled
61 self.logger.debug("Resolving kernel parameters on node %s" % self.name)
62 self.resolve_kernel_params(site_design) 60 self.resolve_kernel_params(site_design)
63 if resolve_aliases: 61 if resolve_aliases:
64 self.logger.debug( 62 self.logger.debug(
diff --git a/python/tests/unit/test_maasdriver_vlan.py b/python/tests/unit/test_maasdriver_vlan.py
new file mode 100644
index 0000000..6094efe
--- /dev/null
+++ b/python/tests/unit/test_maasdriver_vlan.py
@@ -0,0 +1,49 @@
1# Copyright 2018 AT&T Intellectual Property. All other rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14'''Tests for the maasdriver node_results routine.'''
15import pytest
16
17from drydock_provisioner.drivers.node.maasdriver.models.vlan import Vlan
18from drydock_provisioner.drivers.node.maasdriver.errors import RackControllerConflict
19
20
21class TestMaasVlan():
22 def test_add_rack_controller(self, mocker):
23 '''Test vlan model method for setting a managing rack controller.'''
24
25 # A object to return that looks like a requests response
26 # object wrapping a MAAS API response
27 class MockedResponse():
28
29 status_code = 200
30
31 vlan_fields = {'name': 'test', 'dhcp_on': True, 'mtu': 1500}
32
33 primary_rack = "asdf79"
34 secondary_rack = "asdf80"
35 tertiary_rack = "asdf81"
36
37 api_client = mocker.MagicMock()
38 api_client.get.return_value = MockedResponse()
39
40 vlan_obj = Vlan(api_client, **vlan_fields)
41
42 vlan_obj.add_rack_controller(primary_rack)
43 assert vlan_obj.primary_rack == primary_rack
44
45 vlan_obj.add_rack_controller(secondary_rack)
46 assert vlan_obj.secondary_rack == secondary_rack
47
48 with pytest.raises(RackControllerConflict):
49 vlan_obj.add_rack_controller(tertiary_rack)