diff --git a/actions.yaml b/actions.yaml index b35ddf1a..e5e31a42 100644 --- a/actions.yaml +++ b/actions.yaml @@ -36,3 +36,10 @@ create-backup: list-backups: description: List database backups. S3 credentials are retrieved from a relation with the S3 integrator charm. + +restore: + description: Restore a database backup. S3 credentials are retrieved from a relation with the S3 integrator charm. + params: + backup-id: + type: string + description: A backup-id to identify the backup to restore. Format of <%Y-%m-%dT%H:%M:%SZ> diff --git a/src/charm.py b/src/charm.py index 1f0d1420..4118555d 100755 --- a/src/charm.py +++ b/src/charm.py @@ -156,13 +156,18 @@ def _on_install(self, event: InstallEvent) -> None: if self.unit.is_leader(): self.state.cluster.update({"quorum": "default - non-ssl"}) - def _on_cluster_relation_changed(self, event: EventBase) -> None: + def _on_cluster_relation_changed(self, event: EventBase) -> None: # noqa: C901 """Generic handler for all 'something changed, update' events across all relations.""" # not all methods called if not self.state.peer_relation: self._set_status(Status.NO_PEER_RELATION) return + if self.state.cluster.is_restore_in_progress: + # Ongoing backup restore, we can early return here since the + # chain of events is only relevant to the backup event handler + return + # don't want to prematurely set config using outdated/missing relation data # also skip update-status overriding statues during upgrades if not self.upgrade_events.idle: @@ -176,7 +181,7 @@ def _on_cluster_relation_changed(self, event: EventBase) -> None: self.config_manager.set_etc_hosts() # don't run (and restart) if some units are still joining - # instead, wait for relation-changed from it's setting of 'started' + # instead, wait for relation-changed from its setting of 'started' # also don't run (and restart) if some units still need to set ip self._set_status(self.state.all_installed) if not isinstance(self.unit.status, ActiveStatus): @@ -345,6 +350,7 @@ def init_server(self): self.config_manager.set_zookeeper_properties() self.config_manager.set_jaas_config() + self.config_manager.set_client_jaas_config() # during reschedules (e.g upgrades or otherwise) we lose all files # need to manually add-back key/truststores @@ -424,6 +430,14 @@ def update_quorum(self, event: EventBase) -> None: self.update_client_data() + def disconnect_clients(self) -> None: + """Remove a necessary part of the client databag, acting as a logical disconnect.""" + if not self.unit.is_leader(): + return + + for client in self.state.clients: + client.update({"endpoints": ""}) + def update_client_data(self) -> None: """Writes necessary relation data to all related applications.""" if not self.unit.is_leader(): diff --git a/src/core/cluster.py b/src/core/cluster.py index 8b2b3c1a..a3f15282 100644 --- a/src/core/cluster.py +++ b/src/core/cluster.py @@ -361,6 +361,9 @@ def stable(self) -> Status: if self.stale_quorum: return Status.STALE_QUORUM + if self.cluster.is_restore_in_progress: + return Status.ONGOING_RESTORE + if not self.all_servers_added: return Status.NOT_ALL_ADDED @@ -379,3 +382,9 @@ def ready(self) -> Status: return Status.ALL_UNIFIED return self.stable + + @property + def is_next_restore_step_possible(self) -> bool: + """Are all units done with the current restore instruction?""" + current_instruction = self.cluster.restore_instruction + return all((unit.restore_progress is current_instruction for unit in self.servers)) diff --git a/src/core/models.py b/src/core/models.py index 945610f7..ed132c26 100644 --- a/src/core/models.py +++ b/src/core/models.py @@ -13,7 +13,7 @@ from ops.model import Application, Relation, Unit from typing_extensions import deprecated, override -from core.stubs import S3ConnectionInfo +from core.stubs import RestoreStep, S3ConnectionInfo from literals import CHARM_USERS, CLIENT_PORT, ELECTION_PORT, SECRETS_APP, SERVER_PORT logger = logging.getLogger(__name__) @@ -289,6 +289,21 @@ def s3_credentials(self) -> S3ConnectionInfo: # This is checked in events.backup actions return json.loads(self.relation_data.get("s3-credentials", "{}")) + @property + def id_to_restore(self) -> str: + """Backup id to restore.""" + return self.relation_data.get("id-to-restore", "") + + @property + def restore_instruction(self) -> RestoreStep: + """Current restore flow step to go through.""" + return RestoreStep(self.relation_data.get("restore-instruction", "")) + + @property + def is_restore_in_progress(self) -> bool: + """Is the cluster undergoing a restore?""" + return bool(self.id_to_restore) + class ZKServer(RelationState): """State collection metadata for a charm unit.""" @@ -440,3 +455,8 @@ def sans(self) -> dict[str, list[str]]: "sans_ip": [self.ip], "sans_dns": [self.hostname, self.fqdn], } + + @property + def restore_progress(self) -> RestoreStep: + """Latest restore flow step the unit went through.""" + return RestoreStep(self.relation_data.get("restore-progress", "")) diff --git a/src/core/stubs.py b/src/core/stubs.py index 93ca63e4..645e646c 100644 --- a/src/core/stubs.py +++ b/src/core/stubs.py @@ -3,6 +3,7 @@ # See LICENSE file for licensing details. """Types module.""" +from enum import Enum from typing import TypedDict S3ConnectionInfo = TypedDict( @@ -19,3 +20,27 @@ BackupMetadata = TypedDict("BackupMetadata", {"id": str, "log-sequence-number": int, "path": str}) + + +class RestoreStep(str, Enum): + """Represent restore flow step.""" + + NOT_STARTED = "" + STOP_WORKFLOW = "stop" + RESTORE = "restore" + RESTART = "restart" + CLEAN = "clean" + + def next_step(self) -> "RestoreStep": + """Get the next logical restore flow step.""" + match self: + case RestoreStep.NOT_STARTED: + return RestoreStep.STOP_WORKFLOW + case RestoreStep.STOP_WORKFLOW: + return RestoreStep.RESTORE + case RestoreStep.RESTORE: + return RestoreStep.RESTART + case RestoreStep.RESTART: + return RestoreStep.CLEAN + case RestoreStep.CLEAN: + return RestoreStep.NOT_STARTED diff --git a/src/core/workload.py b/src/core/workload.py index 149ac97a..6f3005f4 100644 --- a/src/core/workload.py +++ b/src/core/workload.py @@ -57,6 +57,14 @@ def jaas(self) -> str: """ return f"{self.conf_path}/zookeeper-jaas.cfg" + @property + def client_jaas(self) -> str: + """The client-jaas.cfg filepath. + + Contains internal user credentials used in SASL auth. + """ + return f"{self.conf_path}/client-jaas.cfg" + @property def jmx_prometheus_javaagent(self) -> str: """The JMX exporter JAR filepath. diff --git a/src/events/backup.py b/src/events/backup.py index ae9278be..eda55bc1 100644 --- a/src/events/backup.py +++ b/src/events/backup.py @@ -12,10 +12,14 @@ CredentialsGoneEvent, S3Requirer, ) -from ops import ActionEvent +from ops import ( + ActionEvent, + RelationEvent, +) from ops.framework import Object +from tenacity import retry, retry_if_result, stop_after_attempt, wait_fixed -from core.stubs import S3ConnectionInfo +from core.stubs import RestoreStep, S3ConnectionInfo from literals import S3_BACKUPS_PATH, S3_REL_NAME, Status from managers.backup import BackupManager @@ -41,7 +45,11 @@ def __init__(self, charm): self.framework.observe(self.charm.on.create_backup_action, self._on_create_backup_action) self.framework.observe(self.charm.on.list_backups_action, self._on_list_backups_action) - # self.framework.observe(self.charm.on.restore_action, self._on_restore_action) + self.framework.observe(self.charm.on.restore_action, self._on_restore_action) + + self.framework.observe( + getattr(self.charm.on, "cluster_relation_changed"), self._restore_event_dispatch + ) def _on_s3_credentials_changed(self, event: CredentialsChangedEvent): if not self.charm.unit.is_leader(): @@ -139,6 +147,127 @@ def _on_list_backups_action(self, event: ActionEvent): event.log(output) event.set_results({"backups": json.dumps(backups_metadata)}) - def _on_restore_action(self, _): - # TODO - pass + def _on_restore_action(self, event: ActionEvent): + """Restore a snapshot referenced by its id. + + Steps: + - stop client traffic + - stop all units + - backup local state so that we can rollback if anything goes wrong (manual op) + - wipe data folders + - get snapshot from object storage, save in data folder + - restart units + - cleanup leftover files + - notify clients + """ + id_to_restore = event.params.get("backup-id", "") + failure_conditions = [ + ( + lambda: not self.charm.unit.is_leader(), + "Action must be ran on the application leader", + ), + ( + lambda: not self.charm.state.cluster.s3_credentials, + "Cluster needs an access to an object storage to make a backup", + ), + ( + lambda: not id_to_restore, + "No backup id to restore provided", + ), + ( + lambda: not self.backup_manager.is_snapshot_in_bucket(id_to_restore), + "Backup id not found in storage object", + ), + ( + lambda: bool(self.charm.state.cluster.is_restore_in_progress), + "A snapshot restore is currently ongoing", + ), + ] + + for check, msg in failure_conditions: + if check(): + logging.error(msg) + event.set_results({"error": msg}) + event.fail(msg) + return + + self.charm.state.cluster.update( + { + "id-to-restore": id_to_restore, + "restore-instruction": RestoreStep.NOT_STARTED.value, + } + ) + self.charm.disconnect_clients() + + event.log(f"Beginning restore flow for snapshot {id_to_restore}") + + def _restore_event_dispatch(self, event: RelationEvent): + """Dispatch restore event to the proper method.""" + if not self.charm.state.cluster.is_restore_in_progress: + if self.charm.state.unit_server.restore_progress is not RestoreStep.NOT_STARTED: + self.charm.state.unit_server.update( + {"restore-progress": RestoreStep.NOT_STARTED.value} + ) + self.charm._set_status(self.charm.state.ready) + return + + if self.charm.unit.is_leader(): + self._maybe_progress_step() + + match self.charm.state.cluster.restore_instruction, self.charm.state.unit_server.restore_progress: + case RestoreStep.STOP_WORKFLOW, RestoreStep.NOT_STARTED: + self._stop_workflow() + case RestoreStep.RESTORE, RestoreStep.STOP_WORKFLOW: + self._download_and_restore() + case RestoreStep.RESTART, RestoreStep.RESTORE: + self._restart_workflow() + case RestoreStep.CLEAN, RestoreStep.RESTART: + self._cleaning() + case _: + pass + + def _maybe_progress_step(self): + """Check that all units are done with the current instruction and move to the next if applicable.""" + current_instruction = self.charm.state.cluster.restore_instruction + next_instruction = current_instruction.next_step() + + if self.charm.state.is_next_restore_step_possible: + payload = {"restore-instruction": next_instruction.value} + if current_instruction is RestoreStep.CLEAN: + payload = payload | {"id-to-restore": "", "to_restore": ""} + # Update ACLs for already related clients and trigger a relation-changed + # on their side to enable them to reconnect. + self.charm.update_client_data() + self.charm.quorum_manager.update_acls() + + self.charm.state.cluster.update(payload) + + def _stop_workflow(self) -> None: + self.charm._set_status(Status.ONGOING_RESTORE) + logger.info("Restoring - stopping workflow") + self.charm.workload.stop() + self.charm.state.unit_server.update({"restore-progress": RestoreStep.STOP_WORKFLOW.value}) + + def _download_and_restore(self) -> None: + logger.info("Restoring - restore snapshot") + self.backup_manager.restore_snapshot( + self.charm.state.cluster.id_to_restore, self.charm.workload + ) + self.charm.state.unit_server.update({"restore-progress": RestoreStep.RESTORE.value}) + + def _restart_workflow(self) -> None: + logger.info("Restoring - restarting workflow") + self.charm.workload.restart() + self.charm.state.unit_server.update({"restore-progress": RestoreStep.RESTART.value}) + + @retry( + wait=wait_fixed(5), + stop=stop_after_attempt(3), + retry=retry_if_result(lambda res: res is False), + ) + def _cleaning(self) -> bool | None: + if not self.charm.workload.healthy: + return False + logger.info("Restoring - cleaning files") + self.backup_manager.cleanup_leftover_files(self.charm.workload) + self.charm.state.unit_server.update({"restore-progress": RestoreStep.CLEAN.value}) diff --git a/src/events/provider.py b/src/events/provider.py index 788dfe5a..06726424 100644 --- a/src/events/provider.py +++ b/src/events/provider.py @@ -49,7 +49,7 @@ def _on_client_relation_updated(self, event: RelationEvent) -> None: Future `client_relation_changed` events called on non-leader units checks passwords before restarting. """ - if not self.charm.unit.is_leader(): + if not self.charm.unit.is_leader() or self.charm.state.cluster.is_restore_in_progress: return if not self.charm.state.stable: diff --git a/src/literals.py b/src/literals.py index d78a4a6c..f078f674 100644 --- a/src/literals.py +++ b/src/literals.py @@ -106,6 +106,7 @@ class Status(Enum): BlockedStatus("invalid s3 configuration - missing mandatory parameters"), "ERROR" ) BUCKET_NOT_CREATED = StatusLevel(BlockedStatus("cannot create s3 bucket"), "ERROR") + ONGOING_RESTORE = StatusLevel(MaintenanceStatus("restoring backup"), "INFO") SECRETS_APP = ["sync-password", "super-password", "s3-credentials"] diff --git a/src/managers/backup.py b/src/managers/backup.py index 0ce5e417..75115255 100644 --- a/src/managers/backup.py +++ b/src/managers/backup.py @@ -10,11 +10,12 @@ from io import BytesIO, StringIO from itertools import islice from operator import attrgetter +from pathlib import Path import boto3 import httpx import yaml -from botocore import loaders, regions +from botocore import config, loaders, regions from botocore.exceptions import ClientError from mypy_boto3_s3.service_resource import Bucket from rich.console import Console @@ -22,7 +23,8 @@ from core.cluster import ClusterState from core.stubs import BackupMetadata, S3ConnectionInfo -from literals import ADMIN_SERVER_PORT, S3_BACKUPS_LIMIT, S3_BACKUPS_PATH +from literals import ADMIN_SERVER_PORT, PATHS, S3_BACKUPS_LIMIT, S3_BACKUPS_PATH, USER +from workload import ZKWorkload logger = logging.getLogger(__name__) @@ -45,6 +47,7 @@ def bucket(self) -> Bucket: aws_secret_access_key=s3_parameters["secret-key"], region_name=s3_parameters["region"] if s3_parameters["region"] else None, endpoint_url=self._construct_endpoint(s3_parameters), + config=config.Config(retries={"mode": "standard", "max_attempts": 3}), ) return s3.Bucket(s3_parameters["bucket"]) @@ -176,6 +179,50 @@ def format_backups_table( return out_f.getvalue() + def is_snapshot_in_bucket(self, backup_id: str) -> bool: + """Check whether the requested snapshot to restore is in the object storage.""" + try: + content = self.bucket.meta.client.head_object( + Bucket=self.bucket.name, Key=os.path.join(self.backups_path, backup_id, "snapshot") + ) + except ClientError as ex: + if "(404)" in ex.args[0]: + return False + raise + return bool(content.get("ResponseMetadata", None)) + + def restore_snapshot(self, backup_id: str, workload: ZKWorkload) -> None: + """Download and restore a snapshot. + + Restoring requires removing the previous files on disk. In this step, we move them to a backup folder, + so that we can manually restore them if something goes wrong. + """ + data_dir = Path(PATHS["DATA"]) / "data" / "version-2" + data_log_dir = Path(PATHS["DATA"]) / "data-log" / "version-2" + + workload.exec(["bash", "-c", f"cp -rp {data_dir} {data_dir.parent / 'version-2.bak'}"]) + workload.exec( + ["bash", "-c", f"cp -rp {data_log_dir} {data_log_dir.parent / 'version-2.bak'}"] + ) + + workload.exec(["bash", "-c", f"rm -rf {data_dir / '*'}"]) + workload.exec(["bash", "-c", f"rm -rf {data_log_dir / '*'}"]) + + restored_snapshot = data_dir / "snapshot.0" + self.bucket.Object(os.path.join(self.backups_path, backup_id, "snapshot")).download_file( + f"{restored_snapshot}" + ) + + workload.exec(["bash", "-c", f"chown {USER}:{USER} {restored_snapshot}"]) + + def cleanup_leftover_files(self, workload: ZKWorkload) -> None: + """Cleanup the files previously stored in the data and data-log directories.""" + data_dir = Path(PATHS["DATA"]) / "data" / "version-2.bak" + data_log_dir = Path(PATHS["DATA"]) / "data-log" / "version-2.bak" + + workload.exec(["bash", "-c", f"rm -rf {data_dir}"]) + workload.exec(["bash", "-c", f"rm -rf {data_log_dir}"]) + class _StreamingToFileSyncAdapter: """Wrapper to make httpx.stream behave like a file-like object. diff --git a/src/managers/config.py b/src/managers/config.py index 24813ba2..d654fab1 100644 --- a/src/managers/config.py +++ b/src/managers/config.py @@ -4,6 +4,7 @@ """Manager for for handling configuration building + writing.""" import logging +from textwrap import dedent from ops.model import ConfigData @@ -138,7 +139,8 @@ def jaas_config(self) -> str: """ users = "\n".join(self.jaas_users) or "" - return f""" + return dedent( + f""" QuorumServer {{ org.apache.zookeeper.server.auth.DigestLoginModule required user_sync="{self.state.cluster.internal_user_credentials.get('sync', '')}"; @@ -156,6 +158,20 @@ def jaas_config(self) -> str: user_super="{self.state.cluster.internal_user_credentials.get('super', '')}"; }}; """ + ) + + @property + def client_jaas_config(self) -> str: + """Build the client JAAS config.""" + return dedent( + f""" + Client {{ + org.apache.zookeeper.server.auth.DigestLoginModule required + username="super" + password="{self.state.cluster.internal_user_credentials.get('super', '')}"; + }}; + """ + ) @property def zookeeper_properties(self) -> list[str]: @@ -309,6 +325,10 @@ def set_jaas_config(self) -> None: """Sets the ZooKeeper JAAS config.""" self.workload.write(content=self.jaas_config, path=self.workload.paths.jaas) + def set_client_jaas_config(self) -> None: + """Sets the ZooKeeper client JAAS config.""" + self.workload.write(content=self.client_jaas_config, path=self.workload.paths.client_jaas) + def set_server_jvmflags(self) -> None: """Sets the env-vars needed for SASL auth to /etc/environment on the unit.""" self._update_environment( @@ -388,6 +408,7 @@ def config_changed(self) -> bool: ) ) self.set_jaas_config() + self.set_client_jaas_config() if log_level_changed: logger.info( diff --git a/src/managers/quorum.py b/src/managers/quorum.py index 06668a89..356db5c3 100644 --- a/src/managers/quorum.py +++ b/src/managers/quorum.py @@ -228,8 +228,17 @@ def update_acls(self, event: RelationEvent | None = None) -> None: logger.debug(f"UPDATE CHROOT - {client.database}") self.client.set_acls_znode_leader(path=client.database, acls=[generated_acl]) + subnodes = self.client.leader_znodes(path=client.database) + for node in subnodes: + self.client.set_acls_znode_leader(path=node, acls=[generated_acl]) + # Looks for applications no longer in the relation but still in config + restricted_acl = make_acl( + scheme="sasl", + credential="super", + all=True, + ) for chroot in sorted(leader_chroots - requested_chroots, reverse=True): if not self._is_child_of(chroot, requested_chroots): - logger.info(f"DROP CHROOT - {chroot}") - self.client.delete_znode_leader(path=chroot) + logger.info(f"RESET ACLS CHROOT - {chroot}") + self.client.set_acls_znode_leader(path=chroot, acls=[restricted_acl]) diff --git a/tests/integration/test_backup.py b/tests/integration/test_backup.py index 4d6244bc..758334d6 100644 --- a/tests/integration/test_backup.py +++ b/tests/integration/test_backup.py @@ -13,12 +13,13 @@ from mypy_boto3_s3.service_resource import Bucket from pytest_operator.plugin import OpsTest -from .helpers import APP_NAME +from .helpers import APP_NAME, check_key, get_address, get_user_password, write_key logger = logging.getLogger(__name__) S3_INTEGRATOR = "s3-integrator" S3_CHANNEL = "latest/stable" +APP_TO_RESTORE = "restored" @pytest.fixture(scope="session") @@ -107,6 +108,10 @@ async def test_create_backup(ops_test: OpsTest, s3_bucket: Bucket): if await unit.is_leader_from_status(): leader_unit = unit + super_password = await get_user_password(ops_test, "super") + host = await get_address(ops_test, APP_NAME, leader_unit.name.split("/")[-1]) + write_key(host=host, password=super_password) + create_action = await leader_unit.run_action("create-backup") await create_action.wait() @@ -115,3 +120,41 @@ async def test_create_backup(ops_test: OpsTest, s3_bucket: Bucket): backups = json.loads(response.results.get("backups", "[]")) assert len(backups) == 1 + + +@pytest.mark.abort_on_fail +async def test_restore_backup_new_app(ops_test: OpsTest, s3_bucket: Bucket, zk_charm): + + await ops_test.model.deploy( + zk_charm, + application_name=APP_TO_RESTORE, + num_units=3, + ) + await ops_test.model.wait_for_idle( + apps=[APP_TO_RESTORE], + status="active", + timeout=1000, + ) + await ops_test.model.add_relation(APP_TO_RESTORE, S3_INTEGRATOR) + await ops_test.model.wait_for_idle( + apps=[APP_TO_RESTORE, S3_INTEGRATOR], status="active", timeout=1000, raise_on_error=False + ) + for unit in ops_test.model.applications[APP_TO_RESTORE].units: + if await unit.is_leader_from_status(): + leader_unit = unit + + list_action = await leader_unit.run_action("list-backups") + response = await list_action.wait() + + backups = json.loads(response.results.get("backups", "[]")) + backup_to_restore = backups[0]["id"] + list_action = await leader_unit.run_action("restore", **{"backup-id": backup_to_restore}) + + await ops_test.model.wait_for_idle( + apps=[APP_TO_RESTORE, S3_INTEGRATOR], status="active", timeout=1000, idle_period=30 + ) + super_password = await get_user_password(ops_test, "super") + + for unit in ops_test.model.applications[APP_TO_RESTORE].units: + host = await get_address(ops_test, APP_NAME, unit.name.split("/")[-1]) + check_key(host=host, password=super_password) diff --git a/tests/unit/scenario/test_backup.py b/tests/unit/scenario/test_backup.py index 7fb69865..59d25e22 100644 --- a/tests/unit/scenario/test_backup.py +++ b/tests/unit/scenario/test_backup.py @@ -231,3 +231,96 @@ def test_action_list_backups_no_creds(ctx: Context, base_state: State): assert ( exc_info.value.message == "Cluster needs an access to an object storage to make a backup" ) + + +def test_action_restore_not_leader(ctx: Context, base_state: State): + # Given + state_in = dataclasses.replace(base_state, leader=False) + + # When + # Then + with pytest.raises(ActionFailed) as exc_info: + _ = ctx.run(ctx.on.action("restore"), state_in) + + assert exc_info.value.message == "Action must be ran on the application leader" + + +def test_action_restore_no_creds(ctx: Context, base_state: State): + # Given + state_in = base_state + + # When + # Then + with pytest.raises(ActionFailed) as exc_info: + _ = ctx.run(ctx.on.action("restore"), state_in) + + assert ( + exc_info.value.message == "Cluster needs an access to an object storage to make a backup" + ) + + +def test_action_restore_no_id_param(ctx: Context, base_state: State): + # Given + s3_params = { + "access-key": "speakfriend", + "secret-key": "mellon", + "bucket": "moria", + "region": "", + } + state_in = base_state + + # When + # Then + with ( + pytest.raises(ActionFailed) as exc_info, + patch("core.models.ZKCluster.s3_credentials", new_callable=PropertyMock, value=s3_params), + patch("managers.backup.BackupManager.is_snapshot_in_bucket", side_effect=[False]), + ): + _ = ctx.run(ctx.on.action("restore"), state_in) + + assert exc_info.value.message == "No backup id to restore provided" + + +def test_action_restore_snapshot_not_found(ctx: Context, base_state: State): + # Given + s3_params = { + "access-key": "speakfriend", + "secret-key": "mellon", + "bucket": "moria", + "region": "", + } + state_in = base_state + + # When + # Then + with ( + pytest.raises(ActionFailed) as exc_info, + patch("core.models.ZKCluster.s3_credentials", new_callable=PropertyMock, value=s3_params), + patch("managers.backup.BackupManager.is_snapshot_in_bucket", side_effect=[False]), + ): + _ = ctx.run(ctx.on.action("restore", params={"backup-id": "notfound"}), state_in) + + assert exc_info.value.message == "Backup id not found in storage object" + + +def test_action_restore_ongoing_restore(ctx: Context, base_state: State): + # Given + s3_params = { + "access-key": "speakfriend", + "secret-key": "mellon", + "bucket": "moria", + "region": "", + } + restore_peer = PeerRelation(PEER, PEER, local_app_data={"id-to-restore": "ongoing-backup-id"}) + state_in = dataclasses.replace(base_state, relations=[restore_peer]) + + # When + # Then + with ( + pytest.raises(ActionFailed) as exc_info, + patch("core.models.ZKCluster.s3_credentials", new_callable=PropertyMock, value=s3_params), + patch("managers.backup.BackupManager.is_snapshot_in_bucket", side_effect=[True]), + ): + _ = ctx.run(ctx.on.action("restore", params={"backup-id": "new-backup-id"}), state_in) + + assert exc_info.value.message == "A snapshot restore is currently ongoing" diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 8c25dc70..4fc182f7 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -501,6 +501,7 @@ def test_init_server_sets_blocked_if_not_alive(harness): patch("managers.config.ConfigManager.set_zookeeper_dynamic_properties"), patch("managers.config.ConfigManager.set_zookeeper_properties"), patch("managers.config.ConfigManager.set_jaas_config"), + patch("managers.config.ConfigManager.set_client_jaas_config"), patch("workload.ZKWorkload.start"), patch("workload.ZKWorkload.alive", new_callable=PropertyMock, return_value=False), ): @@ -543,6 +544,9 @@ def test_init_server_calls_necessary_methods(harness): ) as zookeeper_dynamic_properties, patch("managers.config.ConfigManager.set_zookeeper_properties") as zookeeper_properties, patch("managers.config.ConfigManager.set_jaas_config") as zookeeper_jaas_config, + patch( + "managers.config.ConfigManager.set_client_jaas_config" + ) as zookeeper_client_jaas_config, patch("managers.tls.TLSManager.set_private_key") as patched_private_key, patch("managers.tls.TLSManager.set_ca") as patched_ca, patch("managers.tls.TLSManager.set_certificate") as patched_certificate, @@ -557,6 +561,7 @@ def test_init_server_calls_necessary_methods(harness): zookeeper_dynamic_properties.assert_called_once() zookeeper_properties.assert_called_once() zookeeper_jaas_config.assert_called_once() + zookeeper_client_jaas_config.assert_called_once() patched_private_key.assert_called_once() patched_ca.assert_called_once() patched_certificate.assert_called_once() diff --git a/tests/unit/test_config.py b/tests/unit/test_config.py index 059c3e7a..785f6dcd 100644 --- a/tests/unit/test_config.py +++ b/tests/unit/test_config.py @@ -207,6 +207,7 @@ def test_config_changed_updates_properties_jaas_hosts(harness): ), patch("managers.config.ConfigManager.static_properties", return_value="gandalf=grey"), patch("managers.config.ConfigManager.set_jaas_config"), + patch("managers.config.ConfigManager.set_client_jaas_config"), patch("managers.config.ConfigManager.set_zookeeper_properties") as set_props, patch("managers.config.ConfigManager.set_server_jvmflags"), ): @@ -218,10 +219,12 @@ def test_config_changed_updates_properties_jaas_hosts(harness): patch("workload.ZKWorkload.read", return_value=["gandalf=grey"]), patch("managers.config.ConfigManager.set_zookeeper_properties"), patch("managers.config.ConfigManager.set_jaas_config") as set_jaas, + patch("managers.config.ConfigManager.set_client_jaas_config") as set_client_jaas, patch("managers.config.ConfigManager.set_server_jvmflags"), ): harness.charm.config_manager.config_changed() set_jaas.assert_called_once() + set_client_jaas.assert_called_once() def test_update_environment(harness): diff --git a/tests/unit/test_quorum.py b/tests/unit/test_quorum.py index 04408f05..2ef240e4 100644 --- a/tests/unit/test_quorum.py +++ b/tests/unit/test_quorum.py @@ -113,20 +113,12 @@ def test_update_acls_correctly_handles_relation_chroots(harness): for _, kwargs in patched_manager["create_znode_leader"].call_args_list: assert "/rohan" in kwargs["path"] - for _, kwargs in patched_manager["set_acls_znode_leader"].call_args_list: - assert "/rohan" in kwargs["path"] - - removed_men = False - for counter, call in enumerate(patched_manager["delete_znode_leader"].call_args_list): - _, kwargs = call + _, kwargs = patched_manager["set_acls_znode_leader"].call_args_list[0] + assert "/rohan" in kwargs["path"] - if "/fellowship/men" in kwargs["path"]: - assert not removed_men, "Parent zNode removed before all it's children" + paths_updated = { + kwargs["path"] for _, kwargs in patched_manager["set_acls_znode_leader"].call_args_list + } - if kwargs["path"] == "/fellowship/men": - removed_men = True - - # ensure last node to go is the parent - assert ( - patched_manager["delete_znode_leader"].call_args_list[-1][1]["path"] == "/fellowship" - ) + # all paths saw their acls updated + assert not dummy_leader_znodes - paths_updated diff --git a/tests/unit/test_upgrade.py b/tests/unit/test_upgrade.py index 32ef4b39..c8b13d85 100644 --- a/tests/unit/test_upgrade.py +++ b/tests/unit/test_upgrade.py @@ -174,6 +174,7 @@ def test_run_password_rotation_while_upgrading(harness, mocker, upgrade_stack): mocker.patch.object(ConfigManager, "set_zookeeper_dynamic_properties"), mocker.patch.object(ConfigManager, "set_zookeeper_properties"), mocker.patch.object(ConfigManager, "set_jaas_config"), + mocker.patch.object(ConfigManager, "set_client_jaas_config"), patch("charm.ZooKeeperCharm.update_quorum"), ): harness.charm.password_action_events._set_password_action(mock_event)