Skip to content

Commit

Permalink
NAS-133226 / 25.04 / Add ability to backup/restore apps (#15261)
Browse files Browse the repository at this point in the history
* Define basic apps backup service

* Make sure backups dataset gets created

* Add basic validation for creating docker backups

* Take backups of existing metadata/config

* Complete backup functionality for apps

* Add ability to list backups

* Use root model for listing backups in pydantic

* Validate if same existing name of backup is already taken

* No need to take snapshot of docker dataset

* Add ability to restore apps

* Add ability to delete app backup

* Auto-generate app backup on system update

* Minor fixes
  • Loading branch information
sonicaj authored Dec 26, 2024
1 parent f3ab151 commit 6d8b68d
Show file tree
Hide file tree
Showing 5 changed files with 325 additions and 1 deletion.
60 changes: 59 additions & 1 deletion src/middlewared/middlewared/api/v25_04_0/docker.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
from typing import Annotated, Literal

from pydantic import IPvAnyInterface, Field, field_validator, model_validator
from pydantic import IPvAnyInterface, Field, field_validator, model_validator, RootModel

from middlewared.api.base import (
BaseModel, Excluded, excluded_field, ForUpdateMetaclass, NonEmptyString, single_argument_args,
)


__all__ = [
'DockerEntry', 'DockerUpdateArgs', 'DockerUpdateResult', 'DockerStatusArgs', 'DockerStatusResult',
'DockerNvidiaPresentArgs', 'DockerNvidiaPresentResult', 'DockerBackupArgs', 'DockerBackupResult',
'DockerListBackupArgs', 'DockerListBackupResult', 'DockerRestoreBackupArgs', 'DockerRestoreBackupResult',
'DockerDeleteBackupArgs', 'DockerDeleteBackupResult',
]


class AddressPool(BaseModel):
base: IPvAnyInterface
size: Annotated[int, Field(ge=1)]
Expand Down Expand Up @@ -77,3 +85,53 @@ class DockerNvidiaPresentArgs(BaseModel):

class DockerNvidiaPresentResult(BaseModel):
result: bool


class DockerBackupArgs(BaseModel):
backup_name: NonEmptyString | None = Field(default=None)


class DockerBackupResult(BaseModel):
result: NonEmptyString


class DockerListBackupArgs(BaseModel):
pass


class AppInfo(BaseModel):
id: NonEmptyString
name: NonEmptyString
state: NonEmptyString


class BackupInfo(BaseModel):
name: NonEmptyString
apps: list[AppInfo]
snapshot_name: NonEmptyString
created_on: NonEmptyString
backup_path: NonEmptyString


class DockerBackupInfo(RootModel[dict[str, BackupInfo]]):
pass


class DockerListBackupResult(BaseModel):
result: DockerBackupInfo


class DockerRestoreBackupArgs(BaseModel):
backup_name: NonEmptyString


class DockerRestoreBackupResult(BaseModel):
result: None


class DockerDeleteBackupArgs(BaseModel):
backup_name: NonEmptyString


class DockerDeleteBackupResult(BaseModel):
result: None
164 changes: 164 additions & 0 deletions src/middlewared/middlewared/plugins/docker/backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import errno
import logging
import os
import shutil
import yaml
from datetime import datetime

from middlewared.api import api_method
from middlewared.api.current import (
DockerBackupArgs, DockerBackupResult, DockerListBackupArgs, DockerListBackupResult,
DockerDeleteBackupArgs, DockerDeleteBackupResult,
)
from middlewared.plugins.apps.ix_apps.path import get_collective_config_path, get_collective_metadata_path
from middlewared.plugins.zfs_.validation_utils import validate_snapshot_name
from middlewared.service import CallError, job, Service

from .state_utils import backup_apps_state_file_path, backup_ds_path, datasets_to_skip_for_snapshot_on_backup
from .utils import BACKUP_NAME_PREFIX, UPDATE_BACKUP_PREFIX


logger = logging.getLogger('app_lifecycle')


class DockerService(Service):

class Config:
cli_namespace = 'app.docker'

@api_method(DockerBackupArgs, DockerBackupResult, roles=['DOCKER_WRITE'])
@job(lock='docker_backup')
def backup(self, job, backup_name):
"""
Create a backup of existing apps.
"""
self.middleware.call_sync('docker.state.validate')
docker_config = self.middleware.call_sync('docker.config')
name = backup_name or datetime.now().strftime('%F_%T')
if not validate_snapshot_name(f'a@{name}'):
# The a@ added is just cosmetic as the function requires a complete snapshot name
# with the dataset name included in it
raise CallError(f'{name!r} is not a valid snapshot name. It should be a valid ZFS snapshot name')

snap_name = BACKUP_NAME_PREFIX + name
if self.middleware.call_sync('zfs.snapshot.query', [['id', '=', f'{docker_config["dataset"]}@{snap_name}']]):
raise CallError(f'{snap_name!r} snapshot already exists', errno=errno.EEXIST)

if name in self.list_backups():
raise CallError(f'Backup with {name!r} already exists', errno=errno.EEXIST)

backup_base_dir = backup_ds_path()
os.makedirs(backup_base_dir, exist_ok=True)
backup_dir = os.path.join(backup_base_dir, name)
os.makedirs(backup_dir)

job.set_progress(10, 'Basic validation complete')

shutil.copy(get_collective_metadata_path(), os.path.join(backup_dir, 'collective_metadata.yaml'))
shutil.copy(get_collective_config_path(), os.path.join(backup_dir, 'collective_config.yaml'))

with open(backup_apps_state_file_path(name), 'w') as f:
f.write(yaml.safe_dump({app['name']: app for app in self.middleware.call_sync('app.query')}))

with open(os.path.join(backup_dir, 'docker_config.yaml'), 'w') as f:
f.write(yaml.safe_dump(docker_config))

job.set_progress(95, 'Taking snapshot of ix-applications')

self.middleware.call_sync(
'zettarepl.create_recursive_snapshot_with_exclude', docker_config['dataset'],
snap_name, datasets_to_skip_for_snapshot_on_backup(docker_config['dataset'])
)

job.set_progress(100, f'Backup {name!r} complete')

return name

@api_method(DockerListBackupArgs, DockerListBackupResult, roles=['DOCKER_READ'])
def list_backups(self):
"""
List existing app backups.
"""
docker_config = self.middleware.call_sync('docker.config')
if not docker_config['pool']:
return {}

backups_base_dir = backup_ds_path()
backups = {}
snapshots = self.middleware.call_sync(
'zfs.snapshot.query', [
['name', '^', f'{docker_config["dataset"]}@{BACKUP_NAME_PREFIX}']
], {'select': ['name']}
)
for snapshot in snapshots:
backup_name = snapshot['name'].split('@', 1)[-1].split(BACKUP_NAME_PREFIX, 1)[-1]
backup_path = os.path.join(backups_base_dir, backup_name)
if not os.path.exists(backup_path):
continue

try:
with open(backup_apps_state_file_path(backup_name), 'r') as f:
apps = yaml.safe_load(f.read())
except (FileNotFoundError, yaml.YAMLError):
continue

backups[backup_name] = {
'name': backup_name,
'apps': [{k: app[k] for k in ('id', 'name', 'state')} for app in apps.values()],
'snapshot_name': snapshot['name'],
'created_on': str(self.middleware.call_sync(
'zfs.snapshot.get_instance', snapshot['name']
)['properties']['creation']['parsed']),
'backup_path': backup_path,
}

return backups

@api_method(DockerDeleteBackupArgs, DockerDeleteBackupResult, roles=['DOCKER_WRITE'])
def delete_backup(self, backup_name):
"""
Delete `backup_name` app backup.
"""
self.middleware.call_sync('docker.state.validate')

backup = self.middleware.call_sync('docker.list_backups').get(backup_name)
if not backup:
raise CallError(f'Backup {backup_name!r} does not exist', errno=errno.ENOENT)

self.middleware.call_sync('zfs.snapshot.delete', backup['snapshot_name'], {'recursive': True})
shutil.rmtree(backup['backup_path'], True)


async def post_system_update_hook(middleware):
if not (await middleware.call('docker.config'))['dataset']:
# If docker is not configured, there is nothing to backup
logger.debug('Docker is not configured, skipping app\'s backup on system update')
return

backups = [
v for k, v in (await middleware.call('docker.list_backups')).items()
if k.startswith(UPDATE_BACKUP_PREFIX)
]
if len(backups) >= 3:
backups.sort(key=lambda d: d['created_on'])
while len(backups) >= 3:
backup = backups.pop(0)
try:
logger.debug('Deleting %r app\'s old auto-generated backup', backup['name'])
await middleware.call('docker.delete_backup', backup['name'])
except Exception as e:
logger.error(
'Failed to delete %r app backup: %s', backup['name'], e, exc_info=True
)
break

backup_job = await middleware.call(
'docker.backup', f'{UPDATE_BACKUP_PREFIX}-{datetime.now().strftime("%F_%T")}'
)
await backup_job.wait()
if backup_job.error:
logger.error('Failed to backup apps: %s', backup_job.error)


async def setup(middleware):
middleware.register_hook('update.post_update', post_system_update_hook, sync=True)
84 changes: 84 additions & 0 deletions src/middlewared/middlewared/plugins/docker/restore_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import errno
import logging
import os

from middlewared.api import api_method
from middlewared.api.current import DockerRestoreBackupArgs, DockerRestoreBackupResult
from middlewared.plugins.apps.ix_apps.path import get_installed_app_path
from middlewared.plugins.apps.ix_apps.utils import AppState
from middlewared.service import CallError, job, Service


logger = logging.getLogger('app_lifecycle')


class DockerService(Service):

class Config:
cli_namespace = 'app.docker'

@api_method(DockerRestoreBackupArgs, DockerRestoreBackupResult, roles=['DOCKER_WRITE'])
@job(lock='docker_restore_backup')
def restore_backup(self, job, backup_name):
"""
Restore a backup of existing apps.
"""
backup = self.middleware.call_sync('docker.list_backups').get(backup_name)
if not backup:
raise CallError(f'Backup {backup_name!r} not found', errno=errno.ENOENT)

job.set_progress(10, 'Basic validation complete')

logger.debug('Restoring backup %r', backup_name)
self.middleware.call_sync('service.stop', 'docker')
job.set_progress(20, 'Stopped Docker service')

docker_config = self.middleware.call_sync('docker.config')
self.middleware.call_sync(
'zfs.dataset.delete', os.path.join(docker_config['dataset'], 'docker'), {'force': True}
)

job.set_progress(25, f'Rolling back to {backup_name!r} backup')
self.middleware.call_sync(
'zfs.snapshot.rollback', backup['snapshot_name'], {
'force': True,
'recursive': True,
'recursive_clones': True,
'recursive_rollback': True,
}
)

job.set_progress(30, 'Rolled back snapshots')

self.middleware.call_sync('docker.setup.create_update_docker_datasets', docker_config['dataset'])
self.middleware.call_sync('docker.fs_manage.mount')

apps_to_start = []
for app_info in backup['apps']:
if os.path.exists(get_installed_app_path(app_info['id'])) is False:
logger.debug('App %r path not found, skipping restoring', app_info['id'])
continue

if app_info['state'] == AppState.RUNNING.name:
apps_to_start.append(app_info['id'])

metadata_job = self.middleware.call_sync('app.metadata.generate')
metadata_job.wait_sync()
if metadata_job.error:
raise CallError(f'Failed to generate app metadata: {metadata_job.error}')

job.set_progress(50, 'Generated metadata for apps')

self.middleware.call_sync('docker.state.start_service', True)
job.set_progress(70, 'Started Docker service')

logger.debug('Starting %r apps', ', '.join(apps_to_start))
redeploy_job = self.middleware.call_sync(
'core.bulk', 'app.redeploy', [
[app_name] for app_name in apps_to_start
]
)
redeploy_job.wait_sync()
# Not going to raise an error if some app failed to start as that could be true for various apps
logger.debug('Restore complete')
job.set_progress(100, f'Restore {backup_name!r} complete')
14 changes: 14 additions & 0 deletions src/middlewared/middlewared/plugins/docker/state_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ def catalog_ds_path() -> str:
return os.path.join(IX_APPS_MOUNT_PATH, CATALOG_DATASET_NAME)


def backup_apps_state_file_path(backup_name: str) -> str:
return os.path.join(backup_ds_path(), backup_name, 'apps_state.json')


def backup_ds_path() -> str:
return os.path.join(IX_APPS_MOUNT_PATH, 'backups')


def datasets_to_skip_for_snapshot_on_backup(docker_ds: str) -> list[str]:
return [
os.path.join(docker_ds, d) for d in (CATALOG_DATASET_NAME, 'docker')
]


def docker_datasets(docker_ds: str) -> typing.List[str]:
return [docker_ds] + [
os.path.join(docker_ds, d) for d in (
Expand Down
4 changes: 4 additions & 0 deletions src/middlewared/middlewared/plugins/docker/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import os


BACKUP_NAME_PREFIX = 'ix-apps-backup-'
UPDATE_BACKUP_PREFIX = 'system-update-'


def applications_ds_name(pool: str) -> str:
return os.path.join(pool, 'ix-apps')

0 comments on commit 6d8b68d

Please sign in to comment.