Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NAS-133226 / 25.04 / Add ability to backup/restore apps #15261

Merged
merged 13 commits into from
Dec 26, 2024
60 changes: 59 additions & 1 deletion src/middlewared/middlewared/api/v25_04_0/docker.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
from typing import Annotated, Literal

from pydantic import IPvAnyInterface, Field, field_validator, model_validator
from pydantic import IPvAnyInterface, Field, field_validator, model_validator, RootModel

from middlewared.api.base import (
BaseModel, Excluded, excluded_field, ForUpdateMetaclass, NonEmptyString, single_argument_args,
)


__all__ = [
'DockerEntry', 'DockerUpdateArgs', 'DockerUpdateResult', 'DockerStatusArgs', 'DockerStatusResult',
'DockerNvidiaPresentArgs', 'DockerNvidiaPresentResult', 'DockerBackupArgs', 'DockerBackupResult',
'DockerListBackupArgs', 'DockerListBackupResult', 'DockerRestoreBackupArgs', 'DockerRestoreBackupResult',
'DockerDeleteBackupArgs', 'DockerDeleteBackupResult',
]


class AddressPool(BaseModel):
base: IPvAnyInterface
size: Annotated[int, Field(ge=1)]
Expand Down Expand Up @@ -77,3 +85,53 @@ class DockerNvidiaPresentArgs(BaseModel):

class DockerNvidiaPresentResult(BaseModel):
result: bool


class DockerBackupArgs(BaseModel):
backup_name: NonEmptyString | None = Field(default=None)


class DockerBackupResult(BaseModel):
result: NonEmptyString


class DockerListBackupArgs(BaseModel):
pass


class AppInfo(BaseModel):
id: NonEmptyString
name: NonEmptyString
state: NonEmptyString


class BackupInfo(BaseModel):
name: NonEmptyString
apps: list[AppInfo]
snapshot_name: NonEmptyString
created_on: NonEmptyString
backup_path: NonEmptyString


class DockerBackupInfo(RootModel[dict[str, BackupInfo]]):
pass


class DockerListBackupResult(BaseModel):
result: DockerBackupInfo


class DockerRestoreBackupArgs(BaseModel):
backup_name: NonEmptyString


class DockerRestoreBackupResult(BaseModel):
result: None


class DockerDeleteBackupArgs(BaseModel):
backup_name: NonEmptyString


class DockerDeleteBackupResult(BaseModel):
result: None
164 changes: 164 additions & 0 deletions src/middlewared/middlewared/plugins/docker/backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
import errno
import logging
import os
import shutil
import yaml
from datetime import datetime

from middlewared.api import api_method
from middlewared.api.current import (
DockerBackupArgs, DockerBackupResult, DockerListBackupArgs, DockerListBackupResult,
DockerDeleteBackupArgs, DockerDeleteBackupResult,
)
from middlewared.plugins.apps.ix_apps.path import get_collective_config_path, get_collective_metadata_path
from middlewared.plugins.zfs_.validation_utils import validate_snapshot_name
from middlewared.service import CallError, job, Service

from .state_utils import backup_apps_state_file_path, backup_ds_path, datasets_to_skip_for_snapshot_on_backup
from .utils import BACKUP_NAME_PREFIX, UPDATE_BACKUP_PREFIX


logger = logging.getLogger('app_lifecycle')


class DockerService(Service):

class Config:
cli_namespace = 'app.docker'

@api_method(DockerBackupArgs, DockerBackupResult, roles=['DOCKER_WRITE'])
@job(lock='docker_backup')
def backup(self, job, backup_name):
"""
Create a backup of existing apps.
"""
self.middleware.call_sync('docker.state.validate')
docker_config = self.middleware.call_sync('docker.config')
name = backup_name or datetime.now().strftime('%F_%T')
if not validate_snapshot_name(f'a@{name}'):
# The a@ added is just cosmetic as the function requires a complete snapshot name
# with the dataset name included in it
raise CallError(f'{name!r} is not a valid snapshot name. It should be a valid ZFS snapshot name')

snap_name = BACKUP_NAME_PREFIX + name
if self.middleware.call_sync('zfs.snapshot.query', [['id', '=', f'{docker_config["dataset"]}@{snap_name}']]):
raise CallError(f'{snap_name!r} snapshot already exists', errno=errno.EEXIST)

if name in self.list_backups():
raise CallError(f'Backup with {name!r} already exists', errno=errno.EEXIST)

backup_base_dir = backup_ds_path()
os.makedirs(backup_base_dir, exist_ok=True)
backup_dir = os.path.join(backup_base_dir, name)
os.makedirs(backup_dir)

job.set_progress(10, 'Basic validation complete')

shutil.copy(get_collective_metadata_path(), os.path.join(backup_dir, 'collective_metadata.yaml'))
shutil.copy(get_collective_config_path(), os.path.join(backup_dir, 'collective_config.yaml'))

with open(backup_apps_state_file_path(name), 'w') as f:
f.write(yaml.safe_dump({app['name']: app for app in self.middleware.call_sync('app.query')}))

with open(os.path.join(backup_dir, 'docker_config.yaml'), 'w') as f:
f.write(yaml.safe_dump(docker_config))

job.set_progress(95, 'Taking snapshot of ix-applications')

self.middleware.call_sync(
'zettarepl.create_recursive_snapshot_with_exclude', docker_config['dataset'],
snap_name, datasets_to_skip_for_snapshot_on_backup(docker_config['dataset'])
)

job.set_progress(100, f'Backup {name!r} complete')

return name

@api_method(DockerListBackupArgs, DockerListBackupResult, roles=['DOCKER_READ'])
def list_backups(self):
"""
List existing app backups.
"""
docker_config = self.middleware.call_sync('docker.config')
if not docker_config['pool']:
return {}

backups_base_dir = backup_ds_path()
backups = {}
snapshots = self.middleware.call_sync(
'zfs.snapshot.query', [
['name', '^', f'{docker_config["dataset"]}@{BACKUP_NAME_PREFIX}']
], {'select': ['name']}
)
for snapshot in snapshots:
backup_name = snapshot['name'].split('@', 1)[-1].split(BACKUP_NAME_PREFIX, 1)[-1]
backup_path = os.path.join(backups_base_dir, backup_name)
if not os.path.exists(backup_path):
continue

try:
with open(backup_apps_state_file_path(backup_name), 'r') as f:
apps = yaml.safe_load(f.read())
except (FileNotFoundError, yaml.YAMLError):
continue

backups[backup_name] = {
'name': backup_name,
'apps': [{k: app[k] for k in ('id', 'name', 'state')} for app in apps.values()],
'snapshot_name': snapshot['name'],
'created_on': str(self.middleware.call_sync(
'zfs.snapshot.get_instance', snapshot['name']
)['properties']['creation']['parsed']),
'backup_path': backup_path,
}

return backups

@api_method(DockerDeleteBackupArgs, DockerDeleteBackupResult, roles=['DOCKER_WRITE'])
def delete_backup(self, backup_name):
"""
Delete `backup_name` app backup.
"""
self.middleware.call_sync('docker.state.validate')

backup = self.middleware.call_sync('docker.list_backups').get(backup_name)
if not backup:
raise CallError(f'Backup {backup_name!r} does not exist', errno=errno.ENOENT)

self.middleware.call_sync('zfs.snapshot.delete', backup['snapshot_name'], {'recursive': True})
shutil.rmtree(backup['backup_path'], True)


async def post_system_update_hook(middleware):
if not (await middleware.call('docker.config'))['dataset']:
# If docker is not configured, there is nothing to backup
logger.debug('Docker is not configured, skipping app\'s backup on system update')
return

backups = [
v for k, v in (await middleware.call('docker.list_backups')).items()
if k.startswith(UPDATE_BACKUP_PREFIX)
]
if len(backups) >= 3:
backups.sort(key=lambda d: d['created_on'])
while len(backups) >= 3:
backup = backups.pop(0)
try:
logger.debug('Deleting %r app\'s old auto-generated backup', backup['name'])
await middleware.call('docker.delete_backup', backup['name'])
except Exception as e:
logger.error(
'Failed to delete %r app backup: %s', backup['name'], e, exc_info=True
)
break

backup_job = await middleware.call(
'docker.backup', f'{UPDATE_BACKUP_PREFIX}-{datetime.now().strftime("%F_%T")}'
)
await backup_job.wait()
if backup_job.error:
logger.error('Failed to backup apps: %s', backup_job.error)


async def setup(middleware):
middleware.register_hook('update.post_update', post_system_update_hook, sync=True)
84 changes: 84 additions & 0 deletions src/middlewared/middlewared/plugins/docker/restore_backup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import errno
import logging
import os

from middlewared.api import api_method
from middlewared.api.current import DockerRestoreBackupArgs, DockerRestoreBackupResult
from middlewared.plugins.apps.ix_apps.path import get_installed_app_path
from middlewared.plugins.apps.ix_apps.utils import AppState
from middlewared.service import CallError, job, Service


logger = logging.getLogger('app_lifecycle')


class DockerService(Service):

class Config:
cli_namespace = 'app.docker'

@api_method(DockerRestoreBackupArgs, DockerRestoreBackupResult, roles=['DOCKER_WRITE'])
@job(lock='docker_restore_backup')
def restore_backup(self, job, backup_name):
"""
Restore a backup of existing apps.
"""
backup = self.middleware.call_sync('docker.list_backups').get(backup_name)
if not backup:
raise CallError(f'Backup {backup_name!r} not found', errno=errno.ENOENT)

job.set_progress(10, 'Basic validation complete')

logger.debug('Restoring backup %r', backup_name)
self.middleware.call_sync('service.stop', 'docker')
job.set_progress(20, 'Stopped Docker service')

docker_config = self.middleware.call_sync('docker.config')
self.middleware.call_sync(
'zfs.dataset.delete', os.path.join(docker_config['dataset'], 'docker'), {'force': True}
)

job.set_progress(25, f'Rolling back to {backup_name!r} backup')
self.middleware.call_sync(
'zfs.snapshot.rollback', backup['snapshot_name'], {
'force': True,
'recursive': True,
'recursive_clones': True,
'recursive_rollback': True,
}
)

job.set_progress(30, 'Rolled back snapshots')

self.middleware.call_sync('docker.setup.create_update_docker_datasets', docker_config['dataset'])
self.middleware.call_sync('docker.fs_manage.mount')

apps_to_start = []
for app_info in backup['apps']:
if os.path.exists(get_installed_app_path(app_info['id'])) is False:
logger.debug('App %r path not found, skipping restoring', app_info['id'])
continue

if app_info['state'] == AppState.RUNNING.name:
apps_to_start.append(app_info['id'])

metadata_job = self.middleware.call_sync('app.metadata.generate')
metadata_job.wait_sync()
if metadata_job.error:
raise CallError(f'Failed to generate app metadata: {metadata_job.error}')

job.set_progress(50, 'Generated metadata for apps')

self.middleware.call_sync('docker.state.start_service', True)
job.set_progress(70, 'Started Docker service')

logger.debug('Starting %r apps', ', '.join(apps_to_start))
redeploy_job = self.middleware.call_sync(
'core.bulk', 'app.redeploy', [
[app_name] for app_name in apps_to_start
]
)
redeploy_job.wait_sync()
# Not going to raise an error if some app failed to start as that could be true for various apps
logger.debug('Restore complete')
job.set_progress(100, f'Restore {backup_name!r} complete')
14 changes: 14 additions & 0 deletions src/middlewared/middlewared/plugins/docker/state_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,20 @@ def catalog_ds_path() -> str:
return os.path.join(IX_APPS_MOUNT_PATH, CATALOG_DATASET_NAME)


def backup_apps_state_file_path(backup_name: str) -> str:
return os.path.join(backup_ds_path(), backup_name, 'apps_state.json')


def backup_ds_path() -> str:
return os.path.join(IX_APPS_MOUNT_PATH, 'backups')


def datasets_to_skip_for_snapshot_on_backup(docker_ds: str) -> list[str]:
return [
os.path.join(docker_ds, d) for d in (CATALOG_DATASET_NAME, 'docker')
]


def docker_datasets(docker_ds: str) -> typing.List[str]:
return [docker_ds] + [
os.path.join(docker_ds, d) for d in (
Expand Down
4 changes: 4 additions & 0 deletions src/middlewared/middlewared/plugins/docker/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
import os


BACKUP_NAME_PREFIX = 'ix-apps-backup-'
UPDATE_BACKUP_PREFIX = 'system-update-'


def applications_ds_name(pool: str) -> str:
return os.path.join(pool, 'ix-apps')
Loading