From bdabc23393f6c1f2be4ea6e1ea5bfd4d9cdec331 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Wed, 3 Jul 2024 11:28:55 -0300 Subject: [PATCH 01/11] Fix GCP backup test Signed-off-by: Marcelo Henrique Neppel --- tests/integration/test_backups.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_backups.py b/tests/integration/test_backups.py index b813001cbd..adbf462105 100644 --- a/tests/integration/test_backups.py +++ b/tests/integration/test_backups.py @@ -287,7 +287,7 @@ async def test_restore_on_new_cluster(ops_test: OpsTest, github_secrets, charm) async with ops_test.fast_forward(): unit = ops_test.model.units.get(f"{database_app_name}/0") await ops_test.model.block_until( - lambda: unit.workload_status_message == ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE + lambda: unit.workload_status_message == MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET ) # Check that the backup was correctly restored by having only the first created table. From cd8fdda04d256641436867400f7cd0f9b1618fdc Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 5 Jul 2024 11:45:51 -0300 Subject: [PATCH 02/11] Reset restore flag Signed-off-by: Marcelo Henrique Neppel --- src/charm.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/charm.py b/src/charm.py index 8a3e3ab41a..362b0931e0 100755 --- a/src/charm.py +++ b/src/charm.py @@ -1411,6 +1411,11 @@ def _set_primary_status_message(self) -> None: try: if "require-change-bucket-after-restore" in self.app_peer_data: self.unit.status = BlockedStatus(MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET) + self.app_peer_data.update({ + "restoring-backup": "", + "restore-stanza": "", + "restore-to-time": "", + }) return if self._patroni.get_primary(unit_name_pattern=True) == self.unit.name: self.unit.status = ActiveStatus("Primary") From 0a0f45da6343c5c53f28b7bdd1dd6b4a0b6673bd Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 5 Jul 2024 14:00:39 -0300 Subject: [PATCH 03/11] Correctly access application peer data Signed-off-by: Marcelo Henrique Neppel --- src/backups.py | 10 ++++++++-- src/charm.py | 11 ++++++----- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/backups.py b/src/backups.py index 217c17b8aa..58eeac0fc5 100644 --- a/src/backups.py +++ b/src/backups.py @@ -219,6 +219,11 @@ def _is_s3_wal_compatible(self, stanza) -> Tuple[bool, Optional[str]]: and charm_last_archived_wal.split(".", 1)[0] != str(s3_last_archived_wal) ): if bool(self.charm.app_peer_data.get("require-change-bucket-after-restore", None)): + self.charm.app_peer_data.update({ + "restoring-backup": "", + "restore-stanza": "", + "restore-to-time": "", + }) return False, MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET else: return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE @@ -594,12 +599,13 @@ def _on_s3_credential_changed(self, event: CredentialsChangedEvent): event.defer() return + if self.charm.unit.is_leader(): + self.charm.app_peer_data.pop("require-change-bucket-after-restore", None) + # Verify the s3 relation only on the primary. if not self.charm.is_primary: return - self.charm.app_peer_data.pop("require-change-bucket-after-restore", None) - try: self._create_bucket_if_not_exists() except (ClientError, ValueError): diff --git a/src/charm.py b/src/charm.py index 362b0931e0..05d8729bb4 100755 --- a/src/charm.py +++ b/src/charm.py @@ -1410,12 +1410,13 @@ def _set_primary_status_message(self) -> None: """Display 'Primary' in the unit status message if the current unit is the primary.""" try: if "require-change-bucket-after-restore" in self.app_peer_data: + if self.charm.unit.is_leader(): + self.app_peer_data.update({ + "restoring-backup": "", + "restore-stanza": "", + "restore-to-time": "", + }) self.unit.status = BlockedStatus(MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET) - self.app_peer_data.update({ - "restoring-backup": "", - "restore-stanza": "", - "restore-to-time": "", - }) return if self._patroni.get_primary(unit_name_pattern=True) == self.unit.name: self.unit.status = ActiveStatus("Primary") From 0e082d02eb8ba5ba5cd45f6f3de205c6ba9c5907 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 5 Jul 2024 14:10:58 -0300 Subject: [PATCH 04/11] Remove unnecessary code Signed-off-by: Marcelo Henrique Neppel --- src/backups.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/backups.py b/src/backups.py index 58eeac0fc5..c2e7be4dd9 100644 --- a/src/backups.py +++ b/src/backups.py @@ -219,11 +219,6 @@ def _is_s3_wal_compatible(self, stanza) -> Tuple[bool, Optional[str]]: and charm_last_archived_wal.split(".", 1)[0] != str(s3_last_archived_wal) ): if bool(self.charm.app_peer_data.get("require-change-bucket-after-restore", None)): - self.charm.app_peer_data.update({ - "restoring-backup": "", - "restore-stanza": "", - "restore-to-time": "", - }) return False, MOVE_RESTORED_CLUSTER_TO_ANOTHER_BUCKET else: return False, ANOTHER_CLUSTER_REPOSITORY_ERROR_MESSAGE From 012da5223f71a38579709ecca48f55211b82cab7 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 5 Jul 2024 14:35:33 -0300 Subject: [PATCH 05/11] Fix leader check Signed-off-by: Marcelo Henrique Neppel --- src/charm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/charm.py b/src/charm.py index 05d8729bb4..351609aad4 100755 --- a/src/charm.py +++ b/src/charm.py @@ -1410,7 +1410,7 @@ def _set_primary_status_message(self) -> None: """Display 'Primary' in the unit status message if the current unit is the primary.""" try: if "require-change-bucket-after-restore" in self.app_peer_data: - if self.charm.unit.is_leader(): + if self.unit.is_leader(): self.app_peer_data.update({ "restoring-backup": "", "restore-stanza": "", From 2e0dbeb7122acfbf54470a408f5320758c514b40 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 5 Jul 2024 15:16:29 -0300 Subject: [PATCH 06/11] Update library Signed-off-by: Marcelo Henrique Neppel --- lib/charms/postgresql_k8s/v0/postgresql.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/charms/postgresql_k8s/v0/postgresql.py b/lib/charms/postgresql_k8s/v0/postgresql.py index f8f3ad2b23..071643841f 100644 --- a/lib/charms/postgresql_k8s/v0/postgresql.py +++ b/lib/charms/postgresql_k8s/v0/postgresql.py @@ -79,6 +79,10 @@ class PostgreSQLEnableDisableExtensionError(Exception): """Exception raised when enabling/disabling an extension fails.""" +class PostgreSQLGetLastArchivedWALError(Exception): + """Exception raised when retrieving last archived WAL fails.""" + + class PostgreSQLGetPostgreSQLVersionError(Exception): """Exception raised when retrieving PostgreSQL version fails.""" @@ -391,7 +395,7 @@ def get_last_archived_wal(self) -> str: return cursor.fetchone()[0] except psycopg2.Error as e: logger.error(f"Failed to get PostgreSQL last archived WAL: {e}") - raise PostgreSQLGetPostgreSQLVersionError() + raise PostgreSQLGetLastArchivedWALError() def get_postgresql_text_search_configs(self) -> Set[str]: """Returns the PostgreSQL available text search configs. From c61c9e81bd0be4975b47f192d4be7c3d9ef581a6 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Thu, 11 Jul 2024 10:15:27 -0300 Subject: [PATCH 07/11] Handle services restart on reboot Signed-off-by: Marcelo Henrique Neppel --- src/charm.py | 8 ++++++++ src/constants.py | 5 ++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/charm.py b/src/charm.py index 351609aad4..04e99c453b 100755 --- a/src/charm.py +++ b/src/charm.py @@ -1091,10 +1091,18 @@ def _on_start(self, event: StartEvent) -> None: # On replicas, only prepare for starting the instance later. if not self.unit.is_leader(): self._start_replica(event) + self._restart_services_after_reboot() return # Bootstrap the cluster in the leader unit. self._start_primary(event) + self._restart_services_after_reboot() + + def _restart_services_after_reboot(self): + """Restart the Patroni and pgBackRest after a reboot.""" + if self._unit_ip in self.members_ips: + self._patroni.start_patroni() + self.backup.start_stop_pgbackrest_service() def _setup_exporter(self) -> None: """Set up postgresql_exporter options.""" diff --git a/src/constants.py b/src/constants.py index 189ec73d54..81e561efa5 100644 --- a/src/constants.py +++ b/src/constants.py @@ -38,7 +38,10 @@ SNAP_PACKAGES = [ ( POSTGRESQL_SNAP_NAME, - {"revision": {"aarch64": "114", "x86_64": "115"}, "channel": "14/stable"}, + { + "revision": {"aarch64": "117", "x86_64": "116"}, + "channel": "14/edge/disable-pgbackrest-service", + }, ) ] From f988a92d5485b6bf958e641f56d285fa681de0a2 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Thu, 11 Jul 2024 16:57:44 -0300 Subject: [PATCH 08/11] Update unit tests Signed-off-by: Marcelo Henrique Neppel --- tests/unit/test_charm.py | 139 ++++++++++++++++++++++++++------------- 1 file changed, 95 insertions(+), 44 deletions(-) diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index ee2a254fed..6195a639e9 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -1,6 +1,7 @@ # Copyright 2021 Canonical Ltd. # See LICENSE file for licensing details. import itertools +import json import logging import platform import subprocess @@ -558,6 +559,9 @@ def test_enable_disable_extensions(harness, caplog): @patch_network_get(private_address="1.1.1.1") def test_on_start(harness): with ( + patch( + "charm.PostgresqlOperatorCharm._restart_services_after_reboot" + ) as _restart_services_after_reboot, patch( "charm.PostgresqlOperatorCharm._set_primary_status_message" ) as _set_primary_status_message, @@ -617,26 +621,31 @@ def test_on_start(harness): harness.charm.on.start.emit() _bootstrap_cluster.assert_called_once() _oversee_users.assert_not_called() + _restart_services_after_reboot.assert_called_once() assert isinstance(harness.model.unit.status, BlockedStatus) # Set an initial waiting status (like after the install hook was triggered). harness.model.unit.status = WaitingStatus("fake message") # Test the event of an error happening when trying to create the default postgres user. + _restart_services_after_reboot.reset_mock() _member_started.return_value = True harness.charm.on.start.emit() _postgresql.create_user.assert_called_once() _oversee_users.assert_not_called() + _restart_services_after_reboot.assert_called_once() assert isinstance(harness.model.unit.status, BlockedStatus) # Set an initial waiting status again (like after the install hook was triggered). harness.model.unit.status = WaitingStatus("fake message") # Then test the event of a correct cluster bootstrapping. + _restart_services_after_reboot.reset_mock() harness.charm.on.start.emit() assert _postgresql.create_user.call_count == 4 # Considering the previous failed call. _oversee_users.assert_called_once() _enable_disable_extensions.assert_called_once() _set_primary_status_message.assert_called_once() + _restart_services_after_reboot.assert_called_once() @patch_network_get(private_address="1.1.1.1") @@ -644,6 +653,9 @@ def test_on_start_replica(harness): with ( patch("charm.snap.SnapCache") as _snap_cache, patch("charm.Patroni.get_postgresql_version") as _get_postgresql_version, + patch( + "charm.PostgresqlOperatorCharm._restart_services_after_reboot" + ) as _restart_services_after_reboot, patch("charm.Patroni.configure_patroni_on_unit") as _configure_patroni_on_unit, patch( "charm.Patroni.member_started", @@ -674,15 +686,18 @@ def test_on_start_replica(harness): harness.charm._peers.data[harness.charm.app].update({"cluster_initialised": ""}) harness.charm.on.start.emit() _defer.assert_called_once() + _restart_services_after_reboot.assert_called_once() # Set an initial waiting status again (like after a machine restart). harness.model.unit.status = WaitingStatus("fake message") # Mark the cluster as initialised and with the workload up and running. + _restart_services_after_reboot.reset_mock() harness.charm._peers.data[harness.charm.app].update({"cluster_initialised": "True"}) _member_started.return_value = True harness.charm.on.start.emit() _configure_patroni_on_unit.assert_not_called() + _restart_services_after_reboot.assert_called_once() assert isinstance(harness.model.unit.status, ActiveStatus) # Set an initial waiting status (like after the install hook was triggered). @@ -690,9 +705,11 @@ def test_on_start_replica(harness): # Check that the unit status doesn't change when the workload is not running. # In that situation only Patroni is configured in the unit (but not started). + _restart_services_after_reboot.reset_mock() _member_started.return_value = False harness.charm.on.start.emit() _configure_patroni_on_unit.assert_called_once() + _restart_services_after_reboot.assert_called_once() assert isinstance(harness.model.unit.status, WaitingStatus) @@ -2417,70 +2434,104 @@ def test_set_primary_status_message(harness): harness.charm._set_primary_status_message() tc.assertIsInstance(harness.charm.unit.status, MaintenanceStatus) - @patch("charm.Patroni.update_patroni_restart_condition") - @patch("charm.Patroni.get_patroni_restart_condition") - @patch("charm.PostgresqlOperatorCharm._unit_ip") - def test_override_patroni_restart_condition( - self, _unit_ip, get_restart_condition, update_restart_condition + +def test_override_patroni_restart_condition(harness): + with ( + patch("charm.Patroni.update_patroni_restart_condition") as _update_restart_condition, + patch("charm.Patroni.get_patroni_restart_condition") as _get_restart_condition, + patch("charm.PostgresqlOperatorCharm._unit_ip") as _unit_ip, ): - get_restart_condition.return_value = "always" + _get_restart_condition.return_value = "always" # Do override without repeat_cause - assert self.charm.override_patroni_restart_condition("no") is True - get_restart_condition.assert_called_once() - update_restart_condition.assert_called_once_with("no") - get_restart_condition.reset_mock() - update_restart_condition.reset_mock() + assert harness.charm.override_patroni_restart_condition("no", None) is True + _get_restart_condition.assert_called_once() + _update_restart_condition.assert_called_once_with("no") + _get_restart_condition.reset_mock() + _update_restart_condition.reset_mock() - get_restart_condition.return_value = "no" + _get_restart_condition.return_value = "no" # Must not be overridden twice without repeat_cause - assert self.charm.override_patroni_restart_condition("on-failure") is False - get_restart_condition.assert_called_once() - update_restart_condition.assert_not_called() - get_restart_condition.reset_mock() - update_restart_condition.reset_mock() + assert harness.charm.override_patroni_restart_condition("on-failure", None) is False + _get_restart_condition.assert_called_once() + _update_restart_condition.assert_not_called() + _get_restart_condition.reset_mock() + _update_restart_condition.reset_mock() # Reset override - self.charm.restore_patroni_restart_condition() - update_restart_condition.assert_called_once_with("always") - update_restart_condition.reset_mock() + harness.charm.restore_patroni_restart_condition() + _update_restart_condition.assert_called_once_with("always") + _update_restart_condition.reset_mock() # Must not be reset twice - self.charm.restore_patroni_restart_condition() - update_restart_condition.assert_not_called() - update_restart_condition.reset_mock() + harness.charm.restore_patroni_restart_condition() + _update_restart_condition.assert_not_called() + _update_restart_condition.reset_mock() - get_restart_condition.return_value = "always" + _get_restart_condition.return_value = "always" # Do override with repeat_cause - assert self.charm.override_patroni_restart_condition("no", "test_charm") is True - get_restart_condition.assert_called_once() - update_restart_condition.assert_called_once_with("no") - get_restart_condition.reset_mock() - update_restart_condition.reset_mock() + assert harness.charm.override_patroni_restart_condition("no", "test_charm") is True + _get_restart_condition.assert_called_once() + _update_restart_condition.assert_called_once_with("no") + _get_restart_condition.reset_mock() + _update_restart_condition.reset_mock() - get_restart_condition.return_value = "no" + _get_restart_condition.return_value = "no" # Do re-override with repeat_cause - assert self.charm.override_patroni_restart_condition("on-success", "test_charm") is True - get_restart_condition.assert_called_once() - update_restart_condition.assert_called_once_with("on-success") - get_restart_condition.reset_mock() - update_restart_condition.reset_mock() + assert harness.charm.override_patroni_restart_condition("on-success", "test_charm") is True + _get_restart_condition.assert_called_once() + _update_restart_condition.assert_called_once_with("on-success") + _get_restart_condition.reset_mock() + _update_restart_condition.reset_mock() - get_restart_condition.return_value = "on-success" + _get_restart_condition.return_value = "on-success" # Must not be re-overridden with different repeat_cause assert ( - self.charm.override_patroni_restart_condition("on-failure", "test_not_charm") is False + harness.charm.override_patroni_restart_condition("on-failure", "test_not_charm") + is False ) - get_restart_condition.assert_called_once() - update_restart_condition.assert_not_called() - get_restart_condition.reset_mock() - update_restart_condition.reset_mock() + _get_restart_condition.assert_called_once() + _update_restart_condition.assert_not_called() + _get_restart_condition.reset_mock() + _update_restart_condition.reset_mock() # Reset override - self.charm.restore_patroni_restart_condition() - update_restart_condition.assert_called_once_with("always") - update_restart_condition.reset_mock() + harness.charm.restore_patroni_restart_condition() + _update_restart_condition.assert_called_once_with("always") + _update_restart_condition.reset_mock() + + +def test_restart_services_after_reboot(harness): + with ( + patch( + "backups.PostgreSQLBackups.start_stop_pgbackrest_service" + ) as _start_stop_pgbackrest_service, + patch("charm.Patroni.start_patroni") as _start_patroni, + patch( + "charm.PostgresqlOperatorCharm._unit_ip", + new_callable=PropertyMock(return_value="1.1.1.1"), + ) as _unit_ip, + ): + with harness.hooks_disabled(): + harness.update_relation_data( + harness.model.get_relation(PEER).id, + harness.charm.app.name, + {"members_ips": json.dumps([])}, + ) + harness.charm._restart_services_after_reboot() + _start_patroni.assert_not_called() + _start_stop_pgbackrest_service.assert_not_called() + + with harness.hooks_disabled(): + harness.update_relation_data( + harness.model.get_relation(PEER).id, + harness.charm.app.name, + {"members_ips": json.dumps([_unit_ip])}, + ) + harness.charm._restart_services_after_reboot() + _start_patroni.assert_called_once() + _start_stop_pgbackrest_service.assert_called_once() From 6adae6270495804ed582ea283e6ff181d8bb566f Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Fri, 12 Jul 2024 10:01:49 -0300 Subject: [PATCH 09/11] Update snap revisions Signed-off-by: Marcelo Henrique Neppel --- src/constants.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/constants.py b/src/constants.py index 81e561efa5..afe45932ba 100644 --- a/src/constants.py +++ b/src/constants.py @@ -39,7 +39,7 @@ ( POSTGRESQL_SNAP_NAME, { - "revision": {"aarch64": "117", "x86_64": "116"}, + "revision": {"aarch64": "119", "x86_64": "118"}, "channel": "14/edge/disable-pgbackrest-service", }, ) From 6dd04529419028a2f44dcb6b912c00e36e4855e0 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 15 Jul 2024 11:50:57 -0300 Subject: [PATCH 10/11] Update to the published revisions Signed-off-by: Marcelo Henrique Neppel --- src/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/constants.py b/src/constants.py index afe45932ba..f8756a52cf 100644 --- a/src/constants.py +++ b/src/constants.py @@ -39,8 +39,8 @@ ( POSTGRESQL_SNAP_NAME, { - "revision": {"aarch64": "119", "x86_64": "118"}, - "channel": "14/edge/disable-pgbackrest-service", + "revision": {"aarch64": "121", "x86_64": "120"}, + "channel": "14/stable", }, ) ] From be8e002acdf86ff43fc7ddc7b7db7d2b0ccf5d7a Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Mon, 15 Jul 2024 11:57:23 -0300 Subject: [PATCH 11/11] Update snap version in dependencies Signed-off-by: Marcelo Henrique Neppel --- src/dependency.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dependency.json b/src/dependency.json index fb9e081aaa..ae1e86d8e2 100644 --- a/src/dependency.json +++ b/src/dependency.json @@ -9,6 +9,6 @@ "dependencies": {}, "name": "charmed-postgresql", "upgrade_supported": "^14", - "version": "14.11" + "version": "14.12" } }