感谢朋友支持本博客,欢迎共同探讨交流,由于能力和时间有限,错误之处在所难免,欢迎指正!
如有转载,请保留源作者博客信息。
如需交流,欢迎大家博客留言。
一:resize总结
总结,写在最前面,具体分析,请参照后续步骤。
1、resize与migrate底层接口一致,假若前端传入了新的flavor,则是resize,该该新的flavor传入底层。迁移传入底层的flavor则为自身实例相同的flavor。底层根据传入进来的flavor参数走相同的逻辑。resize与migrate的区别是在迁移的同时,是否改变虚拟机的flavor。 2、resize底层与冷迁移公用接口,因此迁移过程中会关闭实例。(注:在confirm_resize过程则不会关闭实例)实质是:当resize完成,新虚拟机已经迁移完毕,且能对外提供服务。后续的确认confirm_resize只是清理迁移数据(清理完毕则不能回滚revert_resize操作)(注:热迁移:整个过程不需要关机) 3、resize/migrate之后,会在数据库表migration表中增加记录,同时会增加instance_system_metadata表记录,记录老的flavor和新的flavor,如果是resize,也会更新instances表中虚拟机的flavor信息。 4、配置项allow_resize_to_same_host表示是否允许迁移到本机,默认是False。 5、配置项resize_confirm_window>0则表示超过resize_confirm_window时间后假若用户没有确认resize,则系统会自动确认。 6、resize涉及技术比较多,涉及到底层网络、磁盘迁移、调度算法等,因此分析流程比较复杂繁琐。所以本文只能是尽可能的将重点逻辑代码进行了讲解。 |
二、resize代码详解分析
1、/nova/api/openstack/compute/servers.py
@wsgi.response(202) @wsgi.serializers(xml=FullServerTe-plate) @wsgi.deserializers(xml=ActionDeserializer) @wsgi.action('resize') def _action_resize(self, req, id, body): """Resizes a given instance to the flavor size requested.""" try: flavor_ref = str(body["resize"]["flavorRef"]) #获取升级flavor if not flavor_ref: msg = _("Resize request has invalid 'flavorRef' attribute.") raise exc.HTTPBadRequest(explanation=msg) except (KeyError, TypeError): msg = _("Resize requests require 'flavorRef' attribute.") raise exc.HTTPBadRequest(explanation=msg) kwargs = {} if 'auto_disk_config' in body['resize']: kwargs['auto_disk_config'] = body['resize']['auto_disk_config'] return self. _resize (req, id, flavor_ref, **kwargs) #跟进到2 |
2、 /nova/api/openstack/compute/servers.py
def _resize(self, req, instance_id, flavor_id, **kwargs): """Begin the resize process with given instance/flavor.""" context = req.environ["nova.context"] instance = self._get_server(context, req, instance_id) #获取虚拟机数据库信息 try: #传入虚拟机instance 、将要升级的flavor_id self.compute_api.resize (context, instance, flavor_id, **kwargs)#跟进到3 except exception.QuotaError as error: raise exc.HTTPRequestEntityTooLarge( explanation=error.format_message(), headers={'Retry-After': 0}) except exception.FlavorNotFound: msg = _("Unable to locate requested flavor.") raise exc.HTTPBadRequest(explanation=msg) except exception.CannotResizeToSameFlavor: msg = _("Resize requires a flavor change.") raise exc.HTTPBadRequest(explanation=msg) except exception.InstanceIsLocked as e: raise exc.HTTPConflict(explanation=e.format_message()) except exception.InstanceInvalidState as state_error: common.raise_http_conflict_for_instance_invalid_state(state_error, 'resize') except exception.ImageNotAuthorized: msg = _("You are not authorized to access the image " "the instance was started with.") raise exc.HTTPUnauthorized(explanation=msg) except exception.ImageNotFound: msg = _("Image that the instance was started " "with could not be found.") raise exc.HTTPBadRequest(explanation=msg) except exception.Invalid: msg = _("Invalid instance image.") raise exc.HTTPBadRequest(explanation=msg) return webob.Response(status_int=202) |
3、/nova/compute/api.py
@wrap_check_policy @check_instance_lock @check_instance_cell @check_instance_state(vm_state=[vm_states.ACTIVE, vm_states.STOPPED], task_state=[None]) def resize(self, context, instance, flavor_id=None, **extra_instance_updates): """Resize (ie, migrate) a running instance. #如果 flavor一致,否则为resize If flavor_id is None, the process is considered a migration, keeping the original flavor_id. If flavor_id is not None, the instance should be migrated to a new host and resized to the new flavor_id. """ self._check_auto_disk_config(instance, **extra_instance_updates) current_instance_type = flavors.extract_flavor(instance) # If flavor_id is not provided, only migrate the instance. #如果前端传递flavor_id,则new_instance_type 为该flavor_id type #否则为instance自身type if not flavor_id: LOG.debug(_("flavor_id is None. Assuming migration."), instance=instance) new_instance_type = current_instance_type else: new_instance_type = flavors.get_flavor_by_flavor_id( flavor_id, read_deleted="no") current_instance_type_name = current_instance_type['name'] new_instance_type_name = new_instance_type['name'] LOG.debug(_("Old instance type %(current_instance_type_name)s, " " new instance type %(new_instance_type_name)s"), {'current_instance_type_name': current_instance_type_name, 'new_instance_type_name': new_instance_type_name}, instance=instance) if not new_instance_type: raise exception.FlavorNotFound(flavor_id=flavor_id) same_instance_type = (current_instance_type['id'] == new_instance_type['id']) # NOTE(sirp): We don't want to force a customer to change their flavor # when Ops is migrating off of a failed host. if not same_instance_type and new_instance_type.get('disabled'): raise exception.FlavorNotFound(flavor_id=flavor_id) if same_instance_type and flavor_id and self.cell_type != 'compute': raise exception.CannotResizeToSameFlavor() # ensure there is sufficient headroom for upsizes deltas = self._upsize_quota_delta(context, new_instance_type, current_instance_type) try: #更新配置相关信息 project_id, user_id = quotas_obj.ids_from_instance(context, instance) quotas = self._reserve_quota_delta(context, deltas, project_id=project_id) except exception.OverQuota as exc: quotas = exc.kwargs['quotas'] overs = exc.kwargs['overs'] headroom = exc.kwargs['headroom'] resource = overs[0] used = quotas[resource] - headroom[resource] total_allowed = used + headroom[resource] overs = ','.join(overs) LOG.warn(_("%(overs)s quota exceeded for %(pid)s," " tried to resize instance."), {'overs': overs, 'pid': context.project_id}) raise exception.TooManyInstances(overs=overs, req=deltas[resource], used=used, allowed=total_allowed, resource=resource) #将instance task_state 状态设置为RESIZE_PREP instance.task_state = task_states.RESIZE_PREP instance.progress = 0 instance.update(extra_instance_updates) instance.save(expected_task_state=[None]) #更新实例状态到数据库 filter_properties = {'ignore_hosts': []} #调度用的忽略列表 #如果nova.conf中allow_resize_to_same_host==false,则将自身host添加到 #ignore_hosts列表中,后续调度则不会将给实例resize到本机 if not CONF.allow_resize_to_same_host: filter_properties['ignore_hosts'].append(instance['host']) # Here when flavor_id is None, the process is considered as migrate. #此处原理同上,针对迁移的配置 if (not flavor_id and not CONF.allow_migrate_to_same_host): filter_properties['ignore_hosts'].append(instance['host']) if self.cell_type == 'api': # Commit reservations early and create migration record. self._resize_cells_support(context, quotas, instance, current_instance_type, new_instance_type) #将instance的一些信息及状态,保存到数据库 表instance_actions中 self._record_action_start(context, instance, instance_actions.RESIZE) scheduler_hint = {'filter_properties': filter_properties}#构造后续调度用参数 self. compute_task_api.resize_instance (context, instance,#跟进到4 extra_instance_updates, scheduler_hint=scheduler_hint, flavor=new_instance_type, reservations=quotas.reservations or []) |
跟进代码可知上述:compute_task_api==conductor.ComputeTaskAPI()
4、/nova/conductor/api.py
def resize_instance(self, context, instance, extra_instance_updates, scheduler_hint, flavor, reservations): # NOTE(comstud): 'extra_instance_updates' is not used here but is # needed for compatibility with the cells_rpcapi version of this # method. self. conductor_compute_rpcapi.migrate_server ( #跟进到5 context, instance, scheduler_hint, False, False, flavor, None, None, reservations) |
5、/nova/conductor/rpcapi.py
def migrate_server(self, context, instance, scheduler_hint, live, rebuild, flavor, block_migration, disk_over_commit, reservations=None): #此处由4传入的参数live==false,非热迁移 if self.client.can_send_version('1.6'): version = '1.6' else: instance = jsonutils.to_primitive( objects_base.obj_to_primitive(instance)) version = '1.4' flavor_p = jsonutils.to_primitive(flavor) cctxt = self.client.prepare(version=version) return cctxt.call(context, ' migrate_server ', #rpc调用,跟进到6 instance=instance, scheduler_hint=scheduler_hint, live=live, rebuild=rebuild, flavor=flavor_p, block_migration=block_migration, disk_over_commit=disk_over_commit, reservations=reservations) |
6、 /nova/conductor/manage.py
@messaging.expected_exceptions(exception.NoValidHost, exception.ComputeServiceUnavailable, exception.InvalidHypervisorType, exception.UnableToMigrateToSelf, exception.DestinationHypervisorTooOld, exception.InvalidLocalStorage, exception.InvalidSharedStorage, exception.MigrationPreCheckError) def migrate_server(self, context, instance, scheduler_hint, live, rebuild, flavor, block_migration, disk_over_commit, reservations=None): if instance and not isinstance(instance, instance_obj.Instance): # NOTE(danms): Until v2 of the RPC API, we need to tolerate # old-world instance objects here attrs = ['metadata', 'system_metadata', 'info_cache', 'security_groups'] instance = instance_obj.Instance._from_db_object( context, instance_obj.Instance(), instance, expected_attrs=attrs) if live and not rebuild and not flavor: #在线迁移、不是重建且没有传递flavor(即不是resize),走在线迁移 self._live_migrate(context, instance, scheduler_hint, block_migration, disk_over_commit) elif not live and not rebuild and flavor: #如果是resize或者冷迁移 instance_uuid = instance['uuid'] with compute_utils.EventReporter(context, self.db, 'cold_migrate', instance_uuid): self. _cold_migrate (context, instance, flavor, #跟进到7 scheduler_hint['filter_properties'], reservations) else: raise NotImplementedError() |
7、 nova/conductor/manage.py
def _cold_migrate(self, context, instance, flavor, filter_properties, reservations): image_ref = instance.image_ref #获取image image = compute_utils.get_image_metadata( context, self.image_service, image_ref, instance) request_spec = scheduler_utils.build_request_spec( context, image, [instance], instance_type=flavor) quotas = quotas_obj.Quotas.from_reservations(context, reservations, instance=instance) try: #此处为重点代码,调度选择出合适的hosts hosts = self.scheduler_rpcapi.select_destinations( #代码 跟进到7-1 context, request_spec, filter_properties) #从hosts中选择第一个host进行冷迁移(第一个即为权重最大者) host_state = hosts[0] except exception.NoValidHost as ex: vm_state = instance['vm_state'] if not vm_state: vm_state = vm_states.ACTIVE updates = {'vm_state': vm_state, 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback() #出现异常则回滚,本博文此处不做更进一步讲解 LOG.warning(_("No valid host found for cold migrate"), instance=instance) return try: scheduler_utils.populate_filter_properties(filter_properties, host_state) # context is not serializable filter_properties.pop('context', None) # TODO(timello): originally, instance_type in request_spec # on compute.api.resize does not have 'extra_specs', so we # remove it for now to keep tests backward compatibility. request_spec['instance_type'].pop('extra_specs') (host, node) = (host_state['host'], host_state['nodename']) self. compute_rpcapi.prep_resize( #resize的前序工作,跟进代码至8 context, image, instance, flavor, host, reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) except Exception as ex: with excutils.save_and_reraise_exception(): updates = {'vm_state': instance['vm_state'], 'task_state': None} self._set_vm_state_and_notify(context, 'migrate_server', updates, ex, request_spec) quotas.rollback()#出现异常则回滚,本博文此处不做更进一步讲解 |
def select_destinations(self, ctxt, request_spec, filter_properties): cctxt = self.client.prepare() return cctxt.call(ctxt, 'select_destinations ', #远程调用跟进到7-2 request_spec=request_spec, filter_properties=filter_properties) |
@messaging.expected_exceptions(exception.NoValidHost) def select_destinations(self, context, request_spec, filter_properties): """Returns destinations(s) best suited for this request_spec and filter_properties. The result should be a list of dicts with 'host', 'nodename' and 'limits' as keys. """ dests = self.driver.select_destinations (context, request_spec, #跟进到7-3 filter_properties) return jsonutils.to_primitive(dests) |
self.driver = importutils.import_object(scheduler_driver)
scheduler_driver = CONF.scheduler_driver
cfg.StrOpt('scheduler_driver',
default='nova.scheduler.filter_scheduler.FilterScheduler',
help='Default driver to use for the scheduler'),
def select_destinations(self, context, request_spec, filter_properties): """Selects a filtered set of hosts and nodes.""" num_instances = request_spec['num_instances'] instance_uuids = request_spec.get('instance_uuids') selected_hosts = self. _schedule (context, request_spec, #代码跟进到7-4 filter_properties, instance_uuids) # Couldn't fulfill the request_spec if len(selected_hosts) < num_instances: raise exception.NoValidHost(reason='') dests = [dict(host=host.obj.host, nodename=host.obj.nodename, limits=host.obj.limits) for host in selected_hosts] return dests |
7-4 、/ nova/scheduler/filter_scheduler.py
下述代码为调度出最合适selected_hosts,更多进一步详解,请参考博文:
因此调度代码则不再做进一步详解。
def _schedule(self, context, request_spec, filter_properties, instance_uuids=None): """Returns a list of hosts that meet the required specs, ordered by their fitness. """ elevated = context.elevated() instance_properties = request_spec['instance_properties'] instance_type = request_spec.get("instance_type", None) update_group_hosts = self._setup_instance_group(context, filter_properties) config_options = self._get_configuration_options() # check retry policy. Rather ugly use of instance_uuids[0]... # but if we've exceeded max retries... then we really only # have a single instance. properties = instance_properties.copy() if instance_uuids: properties['uuid'] = instance_uuids[0] self._populate_retry(filter_properties, properties) filter_properties.update({'context': context, 'request_spec': request_spec, 'config_options': config_options, 'instance_type': instance_type}) self.populate_filter_properties(request_spec, filter_properties) # Find our local list of acceptable hosts by repeatedly # filtering and weighing our options. Each time we choose a # host, we virtually consume resources on it so subsequent # selections can adjust accordingly. # Note: remember, we are using an iterator here. So only # traverse this list once. This can bite you if the hosts # are being scanned in a filter or weighing function. hosts = self._get_all_host_states(elevated) selected_hosts = [] if instance_uuids: num_instances = len(instance_uuids) else: num_instances = request_spec.get('num_instances', 1) &n"sp; for num in xrange(num_instances): # Fil4er local hosts based on requirements ... hosts = self.host_manager.get_filtered_hosts(hosts, filter_properties, index=num) if not hosts: # Can't get any more locally. break LOG.debug(_("Filtered %(hosts)s"), {'hosts': hosts}) weighed_hosts = self.host_manager.get_weighed_hosts(hosts, filter_properties) LOG.debug(_("Weighed %(hosts)s"), {'hosts': weighed_hosts}) scheduler_host_subset_size = CONF.scheduler_host_subset_size if scheduler_host_subset_size > len(weighed_hosts): scheduler_host_subset_size = len(weighed_hosts) if scheduler_host_subset_size < 1: scheduler_host_subset_size = 1 chosen_host = random.choice( weighed_hosts[0:scheduler_host_subset_size]) selected_hosts.append(chosen_host) # Now consume the resources so the filter/weights # will change for the next instance. chosen_host.obj.consume_from_instance(instance_properties) if update_group_hosts is True: filter_properties['group_hosts'].add(chosen_host.obj.host) return selected_hosts |
8、/nova/compute/rpcapi.py
def prep_resize(self, ctxt, image, instance, instance_type, host, reservations=None, request_spec=None, filter_properties=None, node=None): # NOTE(russellb) Havana compat version = self._get_compat_version('3.0', '2.43') instance_type_p = jsonutils.to_primitive(instance_type) image_p = jsonutils.to_primitive(image) cctxt = self.client.prepare(server=host, version=version) cctxt.cast(ctxt, 'prep_resize' , #rpc调用,跟进代码至9 instance=instance, instance_type=instance_type_p, image=image_p, reservations=reservations, request_spec=request_spec, filter_properties=filter_properties, node=node) |
由于csdn限制了文章字数,因此接下来的步骤,请参考博文: