From ef0dea5633250d55b5c92811f08b9621593cf325 Mon Sep 17 00:00:00 2001
From: Adam Martin <adam.martin@aiven.io>
Date: Thu, 9 Feb 2023 17:11:36 +0000
Subject: [PATCH 1/2] controller: don't request new backup if we're already at
 the maximmum

---
 myhoard/controller.py   | 30 ++++++++++++++++-------------
 test/test_controller.py | 42 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/myhoard/controller.py b/myhoard/controller.py
index d3022ef..2866449 100644
--- a/myhoard/controller.py
+++ b/myhoard/controller.py
@@ -81,14 +81,6 @@ class RestoreOptions(TypedDict):
     target_time_approximate_ok: bool
 
 
-def sort_completed_backups(backups: List[Backup]) -> List[Backup]:
-    def key(backup):
-        assert backup["completed_at"] is not None
-        return backup["completed_at"]
-
-    return sorted((backup for backup in backups if backup["completed_at"]), key=key)
-
-
 class Controller(threading.Thread):
     """Main logic controller for the service. This drives the individual handlers like
     BackupStream, BinlogScanner and RestoreCoordinator as well as provides state info
@@ -268,6 +260,14 @@ def mark_backup_requested(
     def mode(self) -> Mode:
         return self.state["mode"]
 
+    @property
+    def completed_backups(self) -> List[Backup]:
+        def key(backup: Backup):
+            assert backup["completed_at"] is not None
+            return backup["completed_at"]
+
+        return sorted((backup for backup in self.state["backups"] if backup["completed_at"]), key=key)
+
     def restore_backup(
         self,
         *,
@@ -958,7 +958,7 @@ def _extend_binlog_stream_list(self):
         assert self.restore_coordinator is not None
         if not self.restore_coordinator.can_add_binlog_streams():
             return
-        backups = sort_completed_backups(self.state["backups"])
+        backups = self.completed_backups
         # If most recent current backup is not in the list of backups being restored then we're probably
         # restoring some old backup and don't want to automatically get latest changes
         if not any(bs["stream_id"] == backups[-1]["stream_id"] for bs in self.restore_coordinator.binlog_streams):
@@ -966,7 +966,6 @@ def _extend_binlog_stream_list(self):
 
         old_backups = [{"site": backup["site"], "stream_id": backup["stream_id"]} for backup in backups]
         self._refresh_backups_list()
-        backups = sort_completed_backups(self.state["backups"])
         new_backups: List[BinlogStream] = [{"site": backup["site"], "stream_id": backup["stream_id"]} for backup in backups]
         if old_backups == new_backups:
             return
@@ -1182,6 +1181,12 @@ def _mark_periodic_backup_requested_if_interval_exceeded(self):
             )
             and (not most_recent_scheduled or time.time() - most_recent_scheduled >= half_backup_interval_s)
         ):
+            # If we already have more than the max, skip this one
+            # We allow one over the max just to handle rotation.
+            if (length := len(self.completed_backups)) > (max_count := self.backup_settings["backup_count_max"]):
+                self.log.info("Skipping automated backup request as we already have our maximum count of backups (%d >= %d)", length, max_count)
+                return
+
             self.log.info(
                 "New normalized time %r differs from previous %r, adding new backup request",
                 normalized_backup_time,
@@ -1220,14 +1225,13 @@ def _process_removed_binlogs(self, binlogs):
                 stream.remove_binlogs(binlogs)
 
     def _purge_old_backups(self):
-        purgeable = [backup for backup in self.state["backups"] if backup["completed_at"]]
+        purgeable = self.completed_backups
         if len(purgeable) <= self.backup_settings["backup_count_min"]:
             return
 
         # For simplicity only ever drop one backup here. This function
         # is called repeatedly so if there are for any reason more backups
         # to drop they will be dropped soon enough
-        purgeable = sort_completed_backups(purgeable)
         backup = purgeable[0]
         if not backup["closed_at"]:
             return
@@ -1610,7 +1614,7 @@ def _switch_basebackup_if_possible(self):
         # We're trying to restore a backup but that keeps on failing in the basebackup restoration phase.
         # If we have an older backup available try restoring that and play back all binlogs so that the
         # system should end up in the exact same state eventually.
-        backups = sort_completed_backups(self.state["backups"])
+        backups = self.completed_backups
         current_stream_id = self.state["restore_options"]["stream_id"]
         earlier_backup = None
         for backup in backups:
diff --git a/test/test_controller.py b/test/test_controller.py
index 385c079..47b21be 100644
--- a/test/test_controller.py
+++ b/test/test_controller.py
@@ -835,6 +835,48 @@ def maybe_flush_binlog():
     assert 8 <= len(seen_backups) <= 13
 
 
+def test_scheduled_backup_is_skipped_when_at_max(master_controller):
+    mcontroller, master = master_controller
+    # Backup every 3 seconds
+    mcontroller.backup_settings["backup_interval_minutes"] = 0.05
+    # Never delete backups if we don't have at least 2 no matter how old they are
+    mcontroller.backup_settings["backup_count_min"] = 2
+    # Delete backups if there are more than this even if the backup to delete is newer than max age
+    mcontroller.backup_settings["backup_count_max"] = 3
+    # Max age way higher than we'll get
+    mcontroller.backup_settings["backup_age_days_max"] = 1
+
+    mcontroller.switch_to_active_mode()
+    mcontroller.start()
+
+    seen_backups = set()
+    highest_backup_count = 0
+    last_flush = [time.monotonic()]
+
+    def maybe_flush_binlog():
+        if time.monotonic() - last_flush[0] > 0.2:
+            with mysql_cursor(**master.connect_options) as cursor:
+                cursor.execute("FLUSH BINARY LOGS")
+            last_flush[0] = time.monotonic()
+
+    # Wait for 30 seconds and ensure backup count stays between 3 and 4 the whole time (once 3 has been reached)
+    start_time = time.monotonic()
+    while time.monotonic() - start_time < 30:
+        maybe_flush_binlog()
+        for backup in mcontroller.state["backups"]:
+            seen_backups.add(backup["stream_id"])
+        completed_backups = [backup for backup in mcontroller.state["backups"] if backup["completed_at"]]
+        if len(completed_backups) > highest_backup_count:
+            highest_backup_count = len(completed_backups)
+        time.sleep(0.1)
+
+    assert highest_backup_count == 4
+
+    # We waited for 30 seconds altogether and backup interval is 3 seconds. There should be more than our max
+    # backups taken, as we should have allowed one extra to rotate through.
+    assert 4 < len(seen_backups)
+
+
 def test_manual_backup_creation(master_controller):
     mcontroller = master_controller[0]
     # Never delete backups if we don't have at least 2 no matter how old they are

From 43f0136b3531bb0fe42d2fc0689dfcd7eb7fc61e Mon Sep 17 00:00:00 2001
From: Adam Martin <adam.martin@aiven.io>
Date: Thu, 9 Feb 2023 17:49:04 +0000
Subject: [PATCH 2/2] controller: do not request a scheduled backup if we have
 a new one already

---
 myhoard/controller.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/myhoard/controller.py b/myhoard/controller.py
index 2866449..a9f48d5 100644
--- a/myhoard/controller.py
+++ b/myhoard/controller.py
@@ -1151,6 +1151,12 @@ def _mark_periodic_backup_requested_if_interval_exceeded(self):
         normalized_backup_time = self._current_normalized_backup_timestamp()
         most_recent_scheduled = None
         last_normalized_backup_time = None
+        last_completed_backup_time = self.completed_backups[-1]["completed_at"] if self.completed_backups else None
+
+        # If we already have a recent enough backup, skip for now.
+        if last_completed_backup_time and datetime.datetime.fromtimestamp(last_completed_backup_time).isoformat() > normalized_backup_time:
+            return
+
         if self.backup_streams:
             normalized_backup_times = [
                 stream.state["normalized_backup_time"]