Skip to content

Commit 78c4d4c

Browse files
committed
WIP: Per node live-migration
Change-Id: I1f118e639e69d0556fbfb0980ff0340d7525a003
1 parent 401e8c4 commit 78c4d4c

File tree

9 files changed

+71
-39
lines changed

9 files changed

+71
-39
lines changed

doc/notification_samples/common_payloads/RequestSpecPayload.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"availability_zone": null,
55
"flavor": {"$ref": "FlavorPayload.json#"},
66
"ignore_hosts": null,
7+
"ignore_nodes": null,
78
"image": {"$ref": "ImageMetaPayload.json#"},
89
"instance_uuid": "d5e6a7b7-80e5-4166-85a3-cd6115201082",
910
"num_instances": 1,

nova/compute/api.py

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5475,15 +5475,13 @@ def evacuate(self, context, instance, host, on_shared_storage,
54755475
# the pre-v2.29 API microversion, which wouldn't set force
54765476
if force is False and host:
54775477
nodes = objects.ComputeNodeList.get_all_by_host(context, host)
5478-
# NOTE(sbauza): Unset the host to make sure we call the scheduler
5479-
host = None
5480-
# FIXME(sbauza): Since only Ironic driver uses more than one
5481-
# compute per service but doesn't support evacuations,
5482-
# let's provide the first one.
5483-
target = nodes[0]
5478+
if len(nodes) == 1:
5479+
node = nodes[0].hypervisor_hostname
5480+
else:
5481+
node = None
54845482
destination = objects.Destination(
5485-
host=target.host,
5486-
node=target.hypervisor_hostname
5483+
host=host,
5484+
node=node
54875485
)
54885486
request_spec.requested_destination = destination
54895487

@@ -5497,7 +5495,8 @@ def evacuate(self, context, instance, host, on_shared_storage,
54975495
bdms=None,
54985496
recreate=True,
54995497
on_shared_storage=on_shared_storage,
5500-
host=host,
5498+
# NOTE(sbauza): To make sure we call the scheduler
5499+
host=None,
55015500
request_spec=request_spec,
55025501
)
55035502

nova/compute/manager.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8002,8 +8002,10 @@ def _detach_interface(self, context, instance, port_id):
80028002

80038003
def _get_compute_info(self, host, nodename=None):
80048004
if not nodename:
8005-
return objects.ComputeNode.get_first_node_by_host_for_old_compat(
8006-
self.context, host)
8005+
nodes = objects.ComputeNodeList.get_all_by_host(self.context, host)
8006+
if len(nodes) != 1:
8007+
raise exception.ComputeHostNotFound(host=host)
8008+
return nodes[0]
80078009

80088010
return objects.ComputeNode.get_by_host_and_nodename(
80098011
self.context, host, nodename)
@@ -8069,7 +8071,7 @@ def check_can_live_migrate_destination(self, ctxt, instance,
80698071
src_compute_info = obj_base.obj_to_primitive(
80708072
self._get_compute_info(ctxt, instance.host, instance.node))
80718073
dst_compute_info = obj_base.obj_to_primitive(
8072-
self._get_compute_info(ctxt, self.host))
8074+
self._get_compute_info(ctxt, self.host, migration.dest_node))
80738075
dest_check_data = self.driver.check_can_live_migrate_destination(ctxt,
80748076
instance, src_compute_info, dst_compute_info,
80758077
block_migration, disk_over_commit)

nova/conductor/tasks/live_migrate.py

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,8 @@ def _execute(self):
9999
# wants the scheduler to pick a destination host, or a host was
100100
# specified but is not forcing it, so they want the scheduler
101101
# filters to run on the specified host, like a scheduler hint.
102-
self.destination, self.dest_node, self.limits = self._find_destination()
102+
self.destination, self.dest_node, self.limits = \
103+
self._find_destination()
103104
else:
104105
# This is the case that the user specified the 'force' flag when
105106
# live migrating with a specific destination host so the scheduler
@@ -110,7 +111,7 @@ def _execute(self):
110111
self._check_destination_has_enough_memory()
111112
source_node, dest_node = (
112113
self._check_compatible_with_source_hypervisor(
113-
self.destination))
114+
self.destination, self.dest_node))
114115
# TODO(mriedem): Call select_destinations() with a
115116
# skip_filters=True flag so the scheduler does the work of claiming
116117
# resources on the destination in Placement but still bypass the
@@ -317,7 +318,7 @@ def _check_destination_is_not_source(self):
317318
instance_id=self.instance.uuid, host=self.destination)
318319

319320
def _check_destination_has_enough_memory(self):
320-
compute = self._get_compute_info(self.destination)
321+
compute = self._get_compute_info(self.destination, self.dest_node)
321322
free_ram_mb = compute.free_ram_mb
322323
total_ram_mb = compute.memory_mb
323324
mem_inst = self.instance.memory_mb
@@ -340,17 +341,19 @@ def _check_destination_has_enough_memory(self):
340341

341342
def _get_compute_info(self, host, nodename=None):
342343
if not nodename:
343-
return objects.ComputeNode.get_first_node_by_host_for_old_compat(
344-
self.context, host)
344+
nodes = objects.ComputeNodeList.get_all_by_host(self.context, host)
345+
if len(nodes) != 1:
346+
raise exception.ComputeHostNotFound(host=host)
347+
return nodes[0]
345348

346349
return objects.ComputeNode.get_by_host_and_nodename(
347350
self.context, host, nodename)
348351

349-
def _check_compatible_with_source_hypervisor(self, destination):
352+
def _check_compatible_with_source_hypervisor(self, dest_host, dest_node):
350353
migration = self.migration
351354
source_info = self._get_compute_info(migration.source_compute,
352355
migration.source_node)
353-
destination_info = self._get_compute_info(destination)
356+
destination_info = self._get_compute_info(dest_host, dest_node)
354357

355358
source_type = source_info.hypervisor_type
356359
destination_type = destination_info.hypervisor_type
@@ -469,14 +472,12 @@ def _get_destination_cell_mapping(self):
469472
reason=(_('Unable to determine in which cell '
470473
'destination host %s lives.') % self.destination))
471474

472-
def _get_request_spec_for_select_destinations(self, attempted_hosts=None):
475+
def _get_request_spec_for_select_destinations(self):
473476
"""Builds a RequestSpec that can be passed to select_destinations
474477
475478
Used when calling the scheduler to pick a destination host for live
476479
migrating the instance.
477480
478-
:param attempted_hosts: List of host names to ignore in the scheduler.
479-
This is generally at least seeded with the source host.
480481
:returns: nova.objects.RequestSpec object
481482
"""
482483
# NOTE(fwiesel): In order to check the compatibility
@@ -530,14 +531,13 @@ def _get_request_spec_for_select_destinations(self, attempted_hosts=None):
530531

531532
def _find_destination(self):
532533
# TODO(johngarbutt) this retry loop should be shared
533-
attempted_hosts = [self.source]
534-
request_spec = self._get_request_spec_for_select_destinations(
535-
attempted_hosts)
534+
attempted_nodes = [self.source_node]
535+
request_spec = self._get_request_spec_for_select_destinations()
536536

537537
host = None
538538
while host is None:
539-
self._check_not_over_max_retries(attempted_hosts)
540-
request_spec.ignore_hosts = attempted_hosts
539+
self._check_not_over_max_retries(attempted_nodes)
540+
request_spec.ignore_nodes = attempted_nodes
541541
try:
542542
selection_lists = self.query_client.select_destinations(
543543
self.context, request_spec, [self.instance.uuid],
@@ -546,6 +546,7 @@ def _find_destination(self):
546546
# only one instance, and we don't care about any alternates.
547547
selection = selection_lists[0][0]
548548
host = selection.service_host
549+
node = selection.nodename
549550
except messaging.RemoteError as ex:
550551
# TODO(ShaoHe Feng) There maybe multi-scheduler, and the
551552
# scheduling algorithm is R-R, we can let other scheduler try.
@@ -568,17 +569,18 @@ def _find_destination(self):
568569
self.context, self.report_client,
569570
self.instance.pci_requests.requests, provider_mapping)
570571
try:
571-
self._check_compatible_with_source_hypervisor(host)
572+
self._check_compatible_with_source_hypervisor(host, node)
572573
self._call_livem_checks_on_host(host, provider_mapping)
573574
except (exception.Invalid, exception.MigrationPreCheckError) as e:
574-
LOG.debug("Skipping host: %(host)s because: %(e)s",
575-
{"host": host, "e": e})
576-
attempted_hosts.append(host)
575+
LOG.debug("Skipping node: %(host)s/%(node)s because: %(e)s",
576+
{"host": host, "node": node, "e": e})
577+
attempted_nodes.append(node)
577578
# The scheduler would have created allocations against the
578579
# selected destination host in Placement, so we need to remove
579580
# those before moving on.
580581
self._remove_host_allocations(selection.compute_node_uuid)
581582
host = None
583+
node = None
582584
# TODO(artom) We should probably just return the whole selection object
583585
# at this point.
584586
return (selection.service_host, selection.nodename, selection.limits)
@@ -595,11 +597,11 @@ def _remove_host_allocations(self, compute_node_uuid):
595597
self.report_client.remove_provider_tree_from_instance_allocation(
596598
self.context, self.instance.uuid, compute_node_uuid)
597599

598-
def _check_not_over_max_retries(self, attempted_hosts):
600+
def _check_not_over_max_retries(self, attempted_nodes):
599601
if CONF.migrate_max_retries == -1:
600602
return
601603

602-
retries = len(attempted_hosts) - 1
604+
retries = len(attempted_nodes) - 1
603605
if retries > CONF.migrate_max_retries:
604606
if self.migration:
605607
self.migration.status = 'failed'

nova/notifications/objects/request_spec.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,12 @@ class RequestSpecPayload(base.NotificationPayloadBase):
2626
# Version 1.1: Add force_hosts, force_nodes, ignore_hosts, image_meta,
2727
# instance_group, requested_destination, retry,
2828
# scheduler_hints and security_groups fields
29-
VERSION = '1.1'
29+
# Version 1.2: Add ignore_nodes field
30+
VERSION = '1.2'
3031

3132
SCHEMA = {
3233
'ignore_hosts': ('request_spec', 'ignore_hosts'),
34+
'ignore_nodes': ('request_spec', 'ignore_nodes'),
3335
'instance_uuid': ('request_spec', 'instance_uuid'),
3436
'project_id': ('request_spec', 'project_id'),
3537
'user_id': ('request_spec', 'user_id'),
@@ -47,6 +49,7 @@ class RequestSpecPayload(base.NotificationPayloadBase):
4749
'force_hosts': fields.StringField(nullable=True),
4850
'force_nodes': fields.StringField(nullable=True),
4951
'ignore_hosts': fields.ListOfStringsField(nullable=True),
52+
'ignore_nodes': fields.ListOfStringsField(nullable=True),
5053
'image_meta': fields.ObjectField('ImageMetaPayload', nullable=True),
5154
'instance_group': fields.ObjectField('ServerGroupPayload',
5255
nullable=True),

nova/objects/request_spec.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ class RequestSpec(base.NovaObject):
7373
'num_instances': fields.IntegerField(default=1),
7474
# NOTE(alex_xu): This field won't be persisted.
7575
'ignore_hosts': fields.ListOfStringsField(nullable=True),
76+
# NOTE(fabianw): This field won't be persisted
77+
'ignore_nodes': fields.ListOfStringsField(nullable=True),
7678
# NOTE(mriedem): In reality, you can only ever have one
7779
# host in the force_hosts list. The fact this is a list
7880
# is a mistake perpetuated over time.
@@ -347,6 +349,7 @@ def from_primitives(cls, context, request_spec, filter_properties):
347349
spec._from_flavor(flavor)
348350
# Hydrate now from filter_properties
349351
spec.ignore_hosts = filter_properties.get('ignore_hosts')
352+
spec.ignore_nodes = filter_properties.get('ignore_nodes')
350353
spec.force_hosts = filter_properties.get('force_hosts')
351354
spec.force_nodes = filter_properties.get('force_nodes')
352355
retry = filter_properties.get('retry', {})
@@ -460,6 +463,8 @@ def to_legacy_filter_properties_dict(self):
460463
filt_props = {}
461464
if self.obj_attr_is_set('ignore_hosts') and self.ignore_hosts:
462465
filt_props['ignore_hosts'] = self.ignore_hosts
466+
if self.obj_attr_is_set('ignore_nodes') and self.ignore_nodes:
467+
filt_props['ignore_nodes'] = self.ignore_nodes
463468
if self.obj_attr_is_set('force_hosts') and self.force_hosts:
464469
filt_props['force_hosts'] = self.force_hosts
465470
if self.obj_attr_is_set('force_nodes') and self.force_nodes:
@@ -527,6 +532,7 @@ def from_components(
527532
spec_obj._from_instance_pci_requests(pci_requests)
528533
spec_obj._from_instance_numa_topology(numa_topology)
529534
spec_obj.ignore_hosts = filter_properties.get('ignore_hosts')
535+
spec_obj.ignore_nodes = filter_properties.get('ignore_nodes')
530536
spec_obj.force_hosts = filter_properties.get('force_hosts')
531537
spec_obj.force_nodes = filter_properties.get('force_nodes')
532538
spec_obj._from_retry(filter_properties.get('retry', {}))
@@ -619,10 +625,11 @@ def _from_db_object(context, spec, db_spec):
619625
# None and we'll lose what is set (but not persisted) on the
620626
# object.
621627
continue
622-
elif key in ('retry', 'ignore_hosts'):
628+
elif key in ('retry', 'ignore_hosts', 'ignore_nodes'):
623629
# NOTE(takashin): Do not override the 'retry' or 'ignore_hosts'
624630
# fields which are not persisted. They are not lazy-loadable
625631
# fields. If they are not set, set None.
632+
# NOTE(fabianw): Same with 'ignore_nodes'
626633
if not spec.obj_attr_is_set(key):
627634
setattr(spec, key, None)
628635
elif key == "numa_topology":
@@ -704,7 +711,8 @@ def _get_update_primitives(self):
704711
spec.instance_group.hosts = None
705712
# NOTE(mriedem): Don't persist these since they are per-request
706713
for excluded in ('retry', 'requested_destination',
707-
'requested_resources', 'ignore_hosts'):
714+
'requested_resources', 'ignore_hosts',
715+
'ignore_nodes'):
708716
if excluded in spec and getattr(spec, excluded):
709717
setattr(spec, excluded, None)
710718
# NOTE(stephenfin): Don't persist network metadata since we have

nova/scheduler/host_manager.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050
class ReadOnlyDict(IterableUserDict):
5151
"""A read-only dict."""
52+
5253
def __init__(self, source=None):
5354
self.data = {}
5455
if source:
@@ -497,6 +498,16 @@ def _strip_ignore_hosts(host_map, hosts_to_ignore):
497498
ignored_hosts_str = ', '.join(ignored_hosts)
498499
LOG.info('Host filter ignoring hosts: %s', ignored_hosts_str)
499500

501+
def _strip_ignore_nodes(host_map, nodes_to_ignore):
502+
ignored_nodes = []
503+
for node in nodes_to_ignore:
504+
for (hostname, nodename) in list(host_map.keys()):
505+
if node.lower() == nodename.lower():
506+
del host_map[(hostname, nodename)]
507+
ignored_nodes.append(node)
508+
ignored_nodes_str = ', '.join(ignored_nodes)
509+
LOG.info('Host filter ignoring nodes: %s', ignored_nodes_str)
510+
500511
def _match_forced_hosts(host_map, hosts_to_force):
501512
forced_hosts = []
502513
lowered_hosts_to_force = [host.lower() for host in hosts_to_force]
@@ -567,6 +578,7 @@ def _get_hosts_matching_request(hosts, requested_destination):
567578
return iter(requested_nodes)
568579

569580
ignore_hosts = spec_obj.ignore_hosts or []
581+
ignore_nodes = spec_obj.ignore_nodes or []
570582
force_hosts = spec_obj.force_hosts or []
571583
force_nodes = spec_obj.force_nodes or []
572584
requested_node = spec_obj.requested_destination
@@ -576,14 +588,18 @@ def _get_hosts_matching_request(hosts, requested_destination):
576588
# possible to any requested destination nodes before passing the
577589
# list to the filters
578590
hosts = _get_hosts_matching_request(hosts, requested_node)
579-
if ignore_hosts or force_hosts or force_nodes:
591+
if ignore_hosts or ignore_nodes or force_hosts or force_nodes:
580592
# NOTE(deva): we can't assume "host" is unique because
581593
# one host may have many nodes.
582594
name_to_cls_map = {(x.host, x.nodename): x for x in hosts}
583595
if ignore_hosts:
584596
_strip_ignore_hosts(name_to_cls_map, ignore_hosts)
585597
if not name_to_cls_map:
586598
return []
599+
if ignore_nodes:
600+
_strip_ignore_nodes(name_to_cls_map, ignore_nodes)
601+
if not name_to_cls_map:
602+
return []
587603
# NOTE(deva): allow force_hosts and force_nodes independently
588604
if force_hosts:
589605
_match_forced_hosts(name_to_cls_map, force_hosts)

nova/tests/unit/notifications/objects/test_notification.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -427,7 +427,7 @@ def test_payload_is_not_generated_if_notification_format_is_unversioned(
427427
'MetricsNotification': '1.0-a73147b93b520ff0061865849d3dfa56',
428428
'MetricsPayload': '1.0-65c69b15b4de5a8c01971cb5bb9ab650',
429429
'NotificationPublisher': '2.2-ff8ef16673817ca7a3ea69c689e260c6',
430-
'RequestSpecPayload': '1.1-64d30723a2e381d0cd6a16a877002c64',
430+
'RequestSpecPayload': '1.2-6e4978f842a19991871904f126b97ecf',
431431
'SchedulerRetriesPayload': '1.0-03a07d09575ef52cced5b1b24301d0b4',
432432
'SelectDestinationsNotification': '1.0-a73147b93b520ff0061865849d3dfa56',
433433
'ServerGroupNotification': '1.0-a73147b93b520ff0061865849d3dfa56',

nova/virt/vmwareapi/driver.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,6 @@ def __init__(self, virtapi, scheme="https"):
178178
self._nodename,
179179
self._cluster_ref,
180180
self._datastore_regex)
181-
self._vc_state,
182181
self._vmops = vmops.VMwareVMOps(self._session,
183182
virtapi,
184183
self._volumeops,
@@ -499,6 +498,8 @@ def get_available_nodes(self, refresh=False):
499498
if CONF.vmware.hypervisor_mode == 'cluster':
500499
return [self._nodename]
501500

501+
return hosts.keys()
502+
502503
def update_provider_tree(self, provider_tree, nodename, allocations=None):
503504
"""Update a ProviderTree object with current resource provider,
504505
inventory information and CPU traits.

0 commit comments

Comments
 (0)