Skip to content

Commit

Permalink
Create backups via scheduler
Browse files Browse the repository at this point in the history
Now all backup-create operations will be processed via scheduler as we do
for volume. Backup-specific filters will be added in a follow-up patch.

Related blueprint: backup-host-selection-algorigthm

Change-Id: Ie2afb57c4861c41982612e6054767cef43fdb867
  • Loading branch information
e0ne authored and Jay Bryant committed Feb 19, 2020
1 parent 3a73123 commit f0211b5
Show file tree
Hide file tree
Showing 12 changed files with 184 additions and 121 deletions.
14 changes: 3 additions & 11 deletions cinder/backup/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@
import cinder.policy
from cinder import quota
from cinder import quota_utils
from cinder.scheduler import rpcapi as scheduler_rpcapi
import cinder.volume
from cinder.volume import volume_utils

backup_opts = [
cfg.BoolOpt('backup_use_same_host',
Expand All @@ -61,6 +61,7 @@ class API(base.Base):

def __init__(self, db=None):
self.backup_rpcapi = backup_rpcapi.BackupAPI()
self.scheduler_rpcapi = scheduler_rpcapi.SchedulerAPI()
self.volume_api = cinder.volume.API()
super(API, self).__init__(db)

Expand Down Expand Up @@ -227,10 +228,6 @@ def create(self, context, name, description, volume_id,
raise exception.InvalidVolume(reason=msg)

previous_status = volume['status']
volume_host = volume_utils.extract_host(volume.host, 'host')
availability_zone = availability_zone or volume.availability_zone
host = self._get_available_backup_service_host(volume_host,
availability_zone)

# Reserve a quota before setting volume status and backup status
try:
Expand Down Expand Up @@ -313,8 +310,6 @@ def create(self, context, name, description, volume_id,
'container': container,
'parent_id': parent_id,
'size': volume['size'],
'host': host,
'availability_zone': availability_zone,
'snapshot_id': snapshot_id,
'data_timestamp': data_timestamp,
'parent': parent,
Expand All @@ -334,10 +329,7 @@ def create(self, context, name, description, volume_id,
finally:
QUOTAS.rollback(context, reservations)

# TODO(DuncanT): In future, when we have a generic local attach,
# this can go via the scheduler, which enables
# better load balancing and isolation of services
self.backup_rpcapi.create_backup(context, backup)
self.scheduler_rpcapi.create_backup(context, backup)

return backup

Expand Down
37 changes: 16 additions & 21 deletions cinder/backup/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,6 @@ def __init__(self, *args, **kwargs):
self.driver_name = new_name
self.service = importutils.import_class(self.driver_name)

def _update_backup_error(self, backup, err,
status=fields.BackupStatus.ERROR):
backup.status = status
backup.fail_reason = err
backup.save()

def init_host(self, **kwargs):
"""Run initialization needed for a standalone service."""
ctxt = context.get_admin_context()
Expand Down Expand Up @@ -239,7 +233,7 @@ def _cleanup_one_backup(self, ctxt, backup):
self._cleanup_one_volume(ctxt, volume)

err = 'incomplete backup reset on manager restart'
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
elif backup['status'] == fields.BackupStatus.RESTORING:
LOG.info('Resetting backup %s to '
'available (was restoring).',
Expand Down Expand Up @@ -369,7 +363,7 @@ def create_backup(self, context, backup):
'expected_status': expected_status,
'actual_status': actual_status,
}
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.InvalidSnapshot(reason=err)
else:
actual_status = volume['status']
Expand All @@ -379,7 +373,7 @@ def create_backup(self, context, backup):
'expected_status': expected_status,
'actual_status': actual_status,
}
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.InvalidVolume(reason=err)

expected_status = fields.BackupStatus.CREATING
Expand All @@ -390,13 +384,13 @@ def create_backup(self, context, backup):
'expected_status': expected_status,
'actual_status': actual_status,
}
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.InvalidBackup(reason=err)

try:
if not self.is_working():
err = _('Create backup aborted due to backup service is down.')
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.InvalidBackup(reason=err)

backup.service = self.driver_name
Expand All @@ -412,7 +406,7 @@ def create_backup(self, context, backup):
context, volume_id,
{'status': previous_status,
'previous_status': 'error_backing-up'})
self._update_backup_error(backup, six.text_type(err))
volume_utils.update_backup_error(backup, six.text_type(err))

# Restore the original status.
if snapshot_id:
Expand Down Expand Up @@ -559,7 +553,7 @@ def restore_backup(self, context, backup, volume_id):
'%(expected_status)s but got %(actual_status)s.') %
{'expected_status': expected_status,
'actual_status': actual_status})
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
self.db.volume_update(context, volume_id,
{'status': fields.VolumeStatus.ERROR})
raise exception.InvalidBackup(reason=err)
Expand Down Expand Up @@ -718,13 +712,13 @@ def delete_backup(self, context, backup):
'%(expected_status)s but got %(actual_status)s.') \
% {'expected_status': expected_status,
'actual_status': actual_status}
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.InvalidBackup(reason=err)

if backup.service and not self.is_working():
err = _('Delete backup is aborted due to backup service is down.')
status = fields.BackupStatus.ERROR_DELETING
self._update_backup_error(backup, err, status)
volume_utils.update_backup_error(backup, err, status)
raise exception.InvalidBackup(reason=err)

if not self._is_our_backup(backup):
Expand All @@ -734,7 +728,7 @@ def delete_backup(self, context, backup):
' backup [%(backup_service)s].')\
% {'configured_service': self.driver_name,
'backup_service': backup.service}
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.InvalidBackup(reason=err)

if backup.service:
Expand All @@ -743,7 +737,8 @@ def delete_backup(self, context, backup):
backup_service.delete_backup(backup)
except Exception as err:
with excutils.save_and_reraise_exception():
self._update_backup_error(backup, six.text_type(err))
volume_utils.update_backup_error(backup,
six.text_type(err))

# Get reservations
try:
Expand Down Expand Up @@ -874,7 +869,7 @@ def import_record(self,
err = _('Import record failed, cannot find backup '
'service to perform the import. Request service '
'%(service)s.') % {'service': backup_service}
self._update_backup_error(backup, err)
volume_utils.update_backup_error(backup, err)
raise exception.ServiceNotFound(service_id=backup_service)
else:
# Yes...
Expand All @@ -888,7 +883,7 @@ def import_record(self,
backup_service.import_record(backup, driver_options)
except Exception as err:
msg = six.text_type(err)
self._update_backup_error(backup, msg)
volume_utils.update_backup_error(backup, msg)
raise exception.InvalidBackup(reason=msg)

required_import_options = {
Expand All @@ -907,7 +902,7 @@ def import_record(self,
msg = (_('Driver successfully decoded imported backup data, '
'but there are missing fields (%s).') %
', '.join(missing_opts))
self._update_backup_error(backup, msg)
volume_utils.update_backup_error(backup, msg)
raise exception.InvalidBackup(reason=msg)

# Confirm the ID from the record in the DB is the right one
Expand All @@ -916,7 +911,7 @@ def import_record(self,
msg = (_('Trying to import backup metadata from id %(meta_id)s'
' into backup %(id)s.') %
{'meta_id': backup_id, 'id': backup.id})
self._update_backup_error(backup, msg)
volume_utils.update_backup_error(backup, msg)
raise exception.InvalidBackup(reason=msg)

# Overwrite some fields
Expand Down
5 changes: 5 additions & 0 deletions cinder/scheduler/driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,3 +154,8 @@ def get_pools(self, context, filters):
"""Must override schedule method for scheduler to work."""
raise NotImplementedError(_(
"Must implement schedule_get_pools"))

def get_backup_host(self, volume, driver=None):
"""Must override schedule method for scheduler to work."""
raise NotImplementedError(_(
"Must implement get_backup_host"))
3 changes: 3 additions & 0 deletions cinder/scheduler/filter_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -576,3 +576,6 @@ def _choose_top_backend_generic_group(self, weighed_backends):
backend_state = top_backend.obj
LOG.debug("Choosing %s", backend_state.backend_id)
return top_backend

def get_backup_host(self, volume, driver=None):
return self.host_manager.get_backup_host(volume, driver)
81 changes: 74 additions & 7 deletions cinder/scheduler/host_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,8 @@

"""Manage backends in the current zone."""

# TODO(smcginnis) update this once six has support for collections.abc
# (https://github.com/benjaminp/six/pull/241) or clean up once we drop py2.7.
try:
from collections.abc import Mapping
except ImportError:
from collections import Mapping
import collections
import random

from oslo_config import cfg
from oslo_log import log as logging
Expand Down Expand Up @@ -71,7 +67,7 @@
LOG = logging.getLogger(__name__)


class ReadOnlyDict(Mapping):
class ReadOnlyDict(collections.Mapping):
"""A read-only dict."""
def __init__(self, source=None):
if source is not None:
Expand Down Expand Up @@ -901,3 +897,74 @@ def _equal_after_convert(self, capability, value):
# If the capability and value are not in the same type,
# we just convert them into string to compare them.
return str(value) == str(capability)

def get_backup_host(self, volume, driver=None):
if volume:
volume_host = volume_utils.extract_host(volume.host, 'host')
else:
volume_host = None
az = volume.availability_zone if volume else None
return self._get_available_backup_service_host(volume_host, az, driver)

def _get_any_available_backup_service(self, availability_zone,
driver=None):
"""Get an available backup service host.
Get an available backup service host in the specified
availability zone.
"""
services = [srv for srv in self._list_backup_services(
availability_zone, driver)]
random.shuffle(services)
return services[0] if services else None

def _get_available_backup_service_host(self, host, az, driver=None):
"""Return an appropriate backup service host."""
backup_host = None
if not host or not CONF.backup_use_same_host:
backup_host = self._get_any_available_backup_service(az, driver)
elif self._is_backup_service_enabled(az, host):
backup_host = host
if not backup_host:
raise exception.ServiceNotFound(service_id='cinder-backup')
return backup_host

def _list_backup_services(self, availability_zone, driver=None):
"""List all enabled backup services.
:returns: list -- hosts for services that are enabled for backup.
"""
services = []

def _is_good_service(cap, driver, az):
if driver is None and az is None:
return True
match_driver = cap['driver_name'] == driver if driver else True
if match_driver:
if not az:
return True
return cap['availability_zone'] == az
return False

for backend, capabilities in self.backup_service_states.items():
if capabilities['backend_state']:
if _is_good_service(capabilities, driver, availability_zone):
services.append(backend)

return services

def _az_matched(self, service, availability_zone):
return ((not availability_zone) or
service.availability_zone == availability_zone)

def _is_backup_service_enabled(self, availability_zone, host):
"""Check if there is a backup service available."""
topic = constants.BACKUP_TOPIC
ctxt = cinder_context.get_admin_context()
services = objects.ServiceList.get_all_by_topic(
ctxt, topic, disabled=False)
for srv in services:
if (self._az_matched(srv, availability_zone) and
srv.host == host and srv.is_up):
return True
return False
13 changes: 13 additions & 0 deletions cinder/scheduler/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
from cinder.scheduler.flows import create_volume
from cinder.scheduler import rpcapi as scheduler_rpcapi
from cinder.volume import rpcapi as volume_rpcapi
from cinder.volume import volume_utils as vol_utils


scheduler_manager_opts = [
Expand Down Expand Up @@ -626,3 +627,15 @@ def work_cleanup(self, context, cleanup_request):

LOG.info('Cleanup requests completed.')
return requested, not_requested

def create_backup(self, context, backup):
volume = self.db.volume_get(context, backup.volume_id)
try:
host = self.driver.get_backup_host(volume)
backup.host = host
backup.save()
self.backup_api.create_backup(context, backup)
except exception.ServiceNotFound:
msg = "Service not found for creating backup."
LOG.error(msg)
vol_utils.update_backup_error(backup, msg)
9 changes: 8 additions & 1 deletion cinder/scheduler/rpcapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,10 @@ class SchedulerAPI(rpc.RPCAPI):
3.9 - Adds create_snapshot method
3.10 - Adds backup_id to create_volume method.
3.11 - Adds manage_existing_snapshot method.
3.12 - Adds create_backup method.
"""

RPC_API_VERSION = '3.11'
RPC_API_VERSION = '3.12'
RPC_DEFAULT_VERSION = '3.0'
TOPIC = constants.SCHEDULER_TOPIC
BINARY = 'cinder-scheduler'
Expand Down Expand Up @@ -260,3 +261,9 @@ def set_log_levels(self, context, service, log_request):
def get_log_levels(self, context, service, log_request):
cctxt = self._get_cctxt(server=service.host, version='3.7')
return cctxt.call(context, 'get_log_levels', log_request=log_request)

@rpc.assert_min_rpc_version('3.12')
def create_backup(self, ctxt, backup):
cctxt = self._get_cctxt()
msg_args = {'backup': backup}
return cctxt.cast(ctxt, 'create_backup', **msg_args)
Loading

0 comments on commit f0211b5

Please sign in to comment.