Skip to content

Commit

Permalink
Merge pull request #53 from xsnippet/rotate-backups
Browse files Browse the repository at this point in the history
Add a primitive backup rotation mechanism
  • Loading branch information
malor authored Jun 30, 2024
2 parents ded4aec + 061a196 commit 7e11fcd
Show file tree
Hide file tree
Showing 6 changed files with 272 additions and 0 deletions.
26 changes: 26 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ jobs:
run: |
python -m pre_commit run --all-files --show-diff-on-failure
python:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.12'

- name: Install pytest / Ruff
run: |
python -m pip install pytest ruff
- name: Run Python linter
run: |
ruff format --check
ruff check
- name: Run Python tests
run: |
pushd roles/postgres/files
PATH=$PWD:$PATH pytest -v .
popd
ansible:
runs-on: ubuntu-latest
steps:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
112 changes: 112 additions & 0 deletions roles/postgres/files/rotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/python

"""Rotate backup files in a given directory.
Implements a primitive FIFO backup rotation strategy by keeping N most recent backup files. The
order is defined by sorting the file names lexicographically: the files that appear later in
the sorted list are considered to be newer.
"""

import argparse
import dataclasses
import pathlib
import typing


@dataclasses.dataclass(order=True)
class Backup:
path: pathlib.Path
size: int


class Args(typing.Protocol):
keep: int
dir: pathlib.Path
pattern: str
no_dry_run: bool


def non_negative_int(str_value: str) -> int:
value = int(str_value)
if value < 0:
raise argparse.ArgumentTypeError(f"Value must be non-negative: {value} < 0")

return value


def parse_args() -> Args:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-n",
"--keep",
required=True,
type=non_negative_int,
help="Keep this many most recent backup files",
)
parser.add_argument(
"-d", "--dir", type=pathlib.Path, help="Path to the directory with backup files"
)
parser.add_argument(
"-p",
"--pattern",
type=str,
default="*",
help="Only consider files that match this glob pattern",
)
parser.add_argument(
"--no-dry-run",
action="store_true",
help="Actually remove the rotated files",
)

namespace = parser.parse_args()
return typing.cast(Args, namespace)


def rotate(
dir: pathlib.Path,
keep: int,
pattern: str = "*",
) -> tuple[list[Backup], list[Backup]]:
"""Scan a directory and return a pair of lists: files to be kept, and files to be removed."""

backups = sorted(
(
Backup(path=entry, size=entry.stat().st_size)
for entry in dir.glob(pattern)
if entry.is_file()
),
reverse=True,
)

to_keep = backups[:keep]
to_remove = backups[keep:]

return (to_keep, to_remove)


def cleanup(to_keep: list[Backup], to_remove: list[Backup], *, dry_run: bool = True):
"""Delete old backup files and print disk space usage stats."""

used_space = sum(backup.size for backup in to_keep)
freed_space = sum(backup.size for backup in to_remove)

if dry_run:
print("Dry run. No changes will be made.\n")
else:
for backup in to_remove:
backup.path.unlink()

print(f"Used space: {len(to_keep)} files, {used_space} bytes")
print(f"Freed space: {len(to_remove)} files, {freed_space} bytes")


def main():
args = parse_args()

to_keep, to_remove = rotate(args.dir, args.keep, args.pattern)
cleanup(to_keep, to_remove, dry_run=not args.no_dry_run)


if __name__ == "__main__":
main()
125 changes: 125 additions & 0 deletions roles/postgres/files/test_rotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pathlib
import subprocess

import pytest


EXPECTED_FILES = [
"2024-06-27.tar.gz",
"2024-06-28.tar.gz",
"2024-06-29.tar.gz",
"2024-06-30.tar.gz",
"2024-07-01.tar.gz",
"2024-07-02.tar.gz",
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


@pytest.fixture
def backups(tmp_path: pathlib.Path):
for filename in EXPECTED_FILES:
(tmp_path / filename).write_text(filename)

assert sorted(item.name for item in tmp_path.glob("*")) == EXPECTED_FILES
return tmp_path


@pytest.fixture
def no_backups(tmp_path: pathlib.Path):
assert sorted(item.name for item in tmp_path.glob("*")) == []
return tmp_path


def test_normal_invocation(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == [
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


def test_normal_invocation_is_idempotent(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == [
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


def test_dry_run_invocation(backups: pathlib.Path):
subprocess.check_call(["rotate.py", "--keep", "3", "--dir", backups])
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES


def test_keep_zero(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "0", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == []


def test_keep_negative(backups: pathlib.Path):
with pytest.raises(subprocess.CalledProcessError):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "-1", "--dir", backups]
)


def test_keep_more_than_files(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "100", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES


def test_keep_pattern(backups):
subprocess.check_call(
[
"rotate.py",
"--no-dry-run",
"--keep",
"1",
"--dir",
backups,
"--pattern",
"2024-06*tar.gz",
]
)
assert sorted(item.name for item in backups.glob("*")) == [
"2024-06-30.tar.gz",
"2024-07-01.tar.gz",
"2024-07-02.tar.gz",
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


def test_keep_pattern_does_not_match_anything(backups):
subprocess.check_call(
[
"rotate.py",
"--no-dry-run",
"--keep",
"1",
"--dir",
backups,
"--pattern",
"2024-08*tar.gz",
]
)
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES
7 changes: 7 additions & 0 deletions roles/postgres/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@
become: true
become_user: postgres

- name: Install the script for backup rotation
ansible.builtin.copy:
src: 'rotate.py'
dest: '/usr/local/bin/rotate.py'
mode: 'u=rwx,g=rx,o=rx'
become: true

- name: Add a service template that allows creating backups of postgresql databases
ansible.builtin.template:
src: [email protected]
Expand Down
1 change: 1 addition & 0 deletions roles/postgres/templates/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ Group = postgres
WorkingDirectory = {{ postgres_backups_dir }}
ExecStartPre = +/usr/bin/chown postgres:postgres {{ postgres_backups_dir }}
ExecStart = /usr/bin/bash -c "/usr/bin/pg_dump --compress=9 --no-owner --format=p --file=%i_$(TZ=UTC date +%%Y%%m%%d-%%H%%M%%S).sql.gz %i"
ExecStartPost = /usr/bin/python /usr/local/bin/rotate.py --keep 30 --dir {{ postgres_backups_dir }}

0 comments on commit 7e11fcd

Please sign in to comment.