From 292e94ee2caed743930c6e84b2cc7f57dbf4e418 Mon Sep 17 00:00:00 2001 From: Phil Sphicas Date: Sat, 17 Jul 2021 21:02:51 +0000 Subject: [PATCH] Avoid expensive MAAS API calls The MAAS API call "GET /MAAS/api/2.0/machines/" retrieves information about every machine known to MAAS, which is very slow. The API supports filtering based on hostname and mac_address (among others), and querying for power parameters for all nodes at once. This change modifies identify_baremetal_node to avoid calling refresh on the full machine list. Also, the refresh method of ResourceCollectionBase is updated to allow passing of params, which can be used to take advantage of the filtering. Note that a filtered call to refresh overwrites the resources collection to only contain the returned values. Most calls to Machines.refresh() aren't really needed at all - they are replaced with a call to Machines.empty_refresh(), which will still make sure that the API endpoint is accessible but return an empty collection. (This may get removed entirely in the future.) Change-Id: Ie58c45e1790c5c827d9d47f5582214ca519946de --- .../drivers/node/maasdriver/actions/node.py | 36 ++-- .../drivers/node/maasdriver/api_client.py | 2 +- .../drivers/node/maasdriver/models/base.py | 4 +- .../drivers/node/maasdriver/models/machine.py | 165 ++++++++++++++---- 4 files changed, 153 insertions(+), 54 deletions(-) diff --git a/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py b/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py index 67020b9d..6814b692 100644 --- a/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py +++ b/python/drydock_provisioner/drivers/node/maasdriver/actions/node.py @@ -249,8 +249,7 @@ class DestroyNode(BaseMaasAction): :return: None """ try: - machine_list = maas_machine.Machines(self.maas_client) - machine_list.refresh() + maas_machine.Machines(self.maas_client).empty_refresh() except Exception as ex: self.logger.warning("Error accessing the MaaS API.", exc_info=ex) self.task.set_status(hd_fields.TaskStatus.Complete) @@ -1092,7 +1091,7 @@ class IdentifyNode(BaseMaasAction): for n in nodes: try: - machine = find_node_in_maas(self.maas_client, n) + machine = find_node_in_maas(self.maas_client, n, probably_exists=False) if machine is None: self.task.failure(focus=n.get_id()) self.task.add_status_msg( @@ -1147,8 +1146,7 @@ class ConfigureHardware(BaseMaasAction): def start(self): try: - machine_list = maas_machine.Machines(self.maas_client) - machine_list.refresh() + maas_machine.Machines(self.maas_client).empty_refresh() except Exception as ex: self.logger.debug("Error accessing the MaaS API.", exc_info=ex) self.task.set_status(hd_fields.TaskStatus.Complete) @@ -1334,8 +1332,7 @@ class ApplyNodeNetworking(BaseMaasAction): def start(self): try: - machine_list = maas_machine.Machines(self.maas_client) - machine_list.refresh() + maas_machine.Machines(self.maas_client).empty_refresh() fabrics = maas_fabric.Fabrics(self.maas_client) fabrics.refresh() @@ -1702,8 +1699,7 @@ class ApplyNodePlatform(BaseMaasAction): def start(self): try: - machine_list = maas_machine.Machines(self.maas_client) - machine_list.refresh() + maas_machine.Machines(self.maas_client).empty_refresh() tag_list = maas_tag.Tags(self.maas_client) tag_list.refresh() @@ -1890,8 +1886,7 @@ class ApplyNodeStorage(BaseMaasAction): def start(self): try: - machine_list = maas_machine.Machines(self.maas_client) - machine_list.refresh() + maas_machine.Machines(self.maas_client).empty_refresh() except Exception as ex: self.logger.debug("Error accessing the MaaS API.", exc_info=ex) self.task.set_status(hd_fields.TaskStatus.Complete) @@ -2259,7 +2254,7 @@ class DeployNode(BaseMaasAction): def start(self): try: machine_list = maas_machine.Machines(self.maas_client) - machine_list.refresh() + machine_list.empty_refresh() except Exception as ex: self.logger.debug("Error accessing the MaaS API.", exc_info=ex) self.task.set_status(hd_fields.TaskStatus.Complete) @@ -2462,25 +2457,28 @@ class DeployNode(BaseMaasAction): return -def find_node_in_maas(maas_client, node_model): +def find_node_in_maas(maas_client, node_model, probably_exists=True): """Find a node in MAAS matching the node_model. - Note that the returned Machine may be a simple Machine or - a RackController. + Note that the returned Machine may be a simple Machine or a RackController. + + The ``probably_exists`` parameter provides a hint that can reduce the + number of MAAS API calls generated, but does not affect whether or not the + machine will ultimately be found. :param maas_client: instance of an active session to MAAS :param node_model: instance of objects.Node to match + :param probably_exists: whether the machine is likely to exist in MAAS with + the correct hostname :returns: instance of maasdriver.models.Machine """ machine_list = maas_machine.Machines(maas_client) - machine_list.refresh() - machine = machine_list.identify_baremetal_node(node_model) + machine = machine_list.identify_baremetal_node(node_model, probably_exists) if not machine: # If node isn't found a normal node, check rack controllers rackd_list = maas_rack.RackControllers(maas_client) - rackd_list.refresh() - machine = rackd_list.identify_baremetal_node(node_model) + machine = rackd_list.identify_baremetal_node(node_model, probably_exists) return machine diff --git a/python/drydock_provisioner/drivers/node/maasdriver/api_client.py b/python/drydock_provisioner/drivers/node/maasdriver/api_client.py index cdc5a190..351b85d6 100644 --- a/python/drydock_provisioner/drivers/node/maasdriver/api_client.py +++ b/python/drydock_provisioner/drivers/node/maasdriver/api_client.py @@ -160,7 +160,7 @@ class MaasRequestFactory(object): part_headers))) kwargs['files'] = files_tuples - params = kwargs.get('params', None) + params = kwargs.pop('params', None) if params is None and 'op' in kwargs.keys(): params = {'op': kwargs.pop('op')} diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/base.py b/python/drydock_provisioner/drivers/node/maasdriver/models/base.py index 21cfcc5f..3cd378ef 100644 --- a/python/drydock_provisioner/drivers/node/maasdriver/models/base.py +++ b/python/drydock_provisioner/drivers/node/maasdriver/models/base.py @@ -233,9 +233,9 @@ class ResourceCollectionBase(object): Initialize or refresh the collection list from MaaS """ - def refresh(self): + def refresh(self, **kwargs): url = self.interpolate_url() - resp = self.api_client.get(url) + resp = self.api_client.get(url, **kwargs) if resp.status_code == 200: self.resource = {} diff --git a/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py b/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py index 1e1a2884..1bac0269 100644 --- a/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py +++ b/python/drydock_provisioner/drivers/node/maasdriver/models/machine.py @@ -532,7 +532,7 @@ class Machines(model_base.ResourceCollectionBase): :param node_name: The hostname of a node to acquire """ - self.refresh() + self.refresh(params={'hostname': node_name}) node = self.singleton({'hostname': node_name}) @@ -563,17 +563,36 @@ class Machines(model_base.ResourceCollectionBase): return node def identify_baremetal_node(self, - node_model): + node_model, + probably_exists=True): """Find MaaS node resource matching Drydock BaremetalNode. - Search all the defined MaaS Machines and attempt to match - one against the provided Drydock BaremetalNode model. Update - the MaaS instance with the correct hostname + Performs one or more queries to the MaaS API to find a Machine matching + the provided Drydock BaremetalNode model, in the following order, and + returns the first match found: - :param node_model: Instance of objects.node.BaremetalNode to search MaaS for matching resource + 1. If ``probably_exists`` is True, queries by hostname: + GET /MAAS/api/2.0/machines/?hostname={hostname} + 2a. For ipmi or redfish, looks for a matching BMC address: + GET /MAAS/api/2.0/machines/?op=power_parameters + and if a matching system_id is found: + GET /MAAS/api/2.0/machines/{system_id}/ + 2b. For virsh, queries by mac address: + GET /MAAS/api/2.0/machines/?mac_address={mac_address} + + :param node_model: Instance of objects.node.BaremetalNode to search + MaaS for matching resource + :param probably_exists: whether the machine is likely to exist in MAAS + with the correct hostname + :returns: instance of maasdriver.models.Machine """ maas_node = None + if probably_exists: + maas_node = self.find_node_with_hostname(node_model.name) + if maas_node: + return maas_node + if node_model.oob_type == 'ipmi' or node_model.oob_type == 'redfish': node_oob_network = node_model.oob_parameters['network'] node_oob_ip = node_model.get_network_address(node_oob_network) @@ -582,27 +601,10 @@ class Machines(model_base.ResourceCollectionBase): self.logger.warn("Node model missing OOB IP address") raise ValueError('Node model missing OOB IP address') - try: - self.collect_power_params() - - maas_node = self.singleton({ - 'power_params.power_address': - node_oob_ip - }) - except ValueError: - self.logger.info( - "Error locating matching MaaS resource for OOB IP %s" % - (node_oob_ip)) - return None + maas_node = self.find_node_with_power_address(node_oob_ip) else: # Use boot_mac for node's not using IPMI - nodes = self.find_nodes_with_mac(node_model.boot_mac) - - if len(nodes) == 1: - maas_node = nodes[0] - else: - self.logger.debug("Error: Found %d nodes with MAC %s", len(nodes), node_model.boot_mac) - maas_node = None + maas_node = self.find_node_with_mac(node_model.boot_mac) if maas_node is None: self.logger.info( @@ -613,13 +615,105 @@ class Machines(model_base.ResourceCollectionBase): return maas_node - def find_nodes_with_mac(self, mac_address): - """Find a list of nodes that own a NIC with ``mac_address``""" - node_list = [] - for n in self.resources.values(): - if n.interface_for_mac(mac_address): - node_list.append(n) - return node_list + def find_node_with_hostname(self, hostname): + """Find the first maching node with hostname ``hostname``""" + url = self.interpolate_url() + # query the MaaS API for machines with a matching mac address. + # this call returns a json list, each member representing a complete + # Machine + self.logger.debug( + "Finding {} with hostname: {}".format( + self.collection_resource.__name__, hostname + ) + ) + + resp = self.api_client.get(url, params={"hostname": hostname}) + + if resp.status_code == 200: + json_list = resp.json() + + for node in json_list: + # construct a Machine from the first API result and return it + self.logger.debug( + "Finding {} with hostname: {}: Found: {}: {}".format( + self.collection_resource.__name__, + hostname, + node.get("system_id"), + node.get("hostname"), + ) + ) + return self.collection_resource.from_dict(self.api_client, node) + + return None + + def find_node_with_power_address(self, power_address): + """Find the first matching node that has a BMC with IP ``power_address``""" + url = self.interpolate_url() + # query the MaaS API for all power parameters at once. + # this call returns a json dict, mapping system id to power parameters + + self.logger.debug( + "Finding {} with power address: {}".format( + self.collection_resource.__name__, power_address + ) + ) + + resp = self.api_client.get(url, op="power_parameters") + + if resp.status_code == 200: + json_dict = resp.json() + + for system_id, power_params in json_dict.items(): + self.logger.debug( + "Finding {} with power address: {}: Considering: {}: {}".format( + self.collection_resource.__name__, + power_address, + system_id, + power_params.get("power_address"), + ) + ) + if power_params.get("power_address") == power_address: + self.logger.debug( + "Finding {} with power address: {}: Found: {}: {}".format( + self.collection_resource.__name__, + power_address, + system_id, + power_params.get("power_address"), + ) + ) + + # the API result isn't quite enough to contruct a Machine, + # so construct one with the system_id and then refresh + res = self.collection_resource( + self.api_client, + resource_id=system_id, + power_parameters=power_params, + ) + res.refresh() + return res + + return None + + def find_node_with_mac(self, mac_address): + """Find the first maching node that own a NIC with ``mac_address``""" + url = self.interpolate_url() + # query the MaaS API for machines with a matching mac address. + # this call returns a json list, each member representing a complete + # Machine + resp = self.api_client.get(url, params={'mac_address': mac_address}) + + if resp.status_code == 200: + json_list = resp.json() + + # if len(json_list) > 1: + # # XXX: is this check worth it? maybe we ignore + # raise ValueError('Multiple machines found with mac_address: {}'.format(mac_address)) + + for o in json_list: + # construct a Machine from the first API result and return it + return self.collection_resource.from_dict(self.api_client, o) + + return None def query(self, query): """Custom query method to deal with complex fields.""" @@ -658,3 +752,10 @@ class Machines(model_base.ResourceCollectionBase): raise errors.DriverError("Failed updating MAAS url %s - return code %s" % (url, resp.status_code)) + + def empty_refresh(self): + """Check connectivity to MAAS machines API + + Sends a valid query that should return an empty list of machines + """ + self.refresh(params={'mac_address': '00:00:00:00:00:00'})