diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 9255017..5d7a4a1 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -33,6 +33,32 @@ jobs: run: | python -m pre_commit run --all-files --show-diff-on-failure + python: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Install pytest / Ruff + run: | + python -m pip install pytest ruff + + - name: Run Python linter + run: | + ruff format --check + ruff check + + - name: Run Python tests + run: | + pushd roles/postgres/files + PATH=$PWD:$PATH pytest -v . + popd + ansible: runs-on: ubuntu-latest steps: diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bee8a64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__ diff --git a/roles/postgres/files/rotate.py b/roles/postgres/files/rotate.py new file mode 100755 index 0000000..e533114 --- /dev/null +++ b/roles/postgres/files/rotate.py @@ -0,0 +1,112 @@ +#!/usr/bin/python + +"""Rotate backup files in a given directory. + +Implements a primitive FIFO backup rotation strategy by keeping N most recent backup files. The +order is defined by sorting the file names lexicographically: the files that appear later in +the sorted list are considered to be newer. +""" + +import argparse +import dataclasses +import pathlib +import typing + + +@dataclasses.dataclass(order=True) +class Backup: + path: pathlib.Path + size: int + + +class Args(typing.Protocol): + keep: int + dir: pathlib.Path + pattern: str + no_dry_run: bool + + +def non_negative_int(str_value: str) -> int: + value = int(str_value) + if value < 0: + raise argparse.ArgumentTypeError(f"Value must be non-negative: {value} < 0") + + return value + + +def parse_args() -> Args: + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "-n", + "--keep", + required=True, + type=non_negative_int, + help="Keep this many most recent backup files", + ) + parser.add_argument( + "-d", "--dir", type=pathlib.Path, help="Path to the directory with backup files" + ) + parser.add_argument( + "-p", + "--pattern", + type=str, + default="*", + help="Only consider files that match this glob pattern", + ) + parser.add_argument( + "--no-dry-run", + action="store_true", + help="Actually remove the rotated files", + ) + + namespace = parser.parse_args() + return typing.cast(Args, namespace) + + +def rotate( + dir: pathlib.Path, + keep: int, + pattern: str = "*", +) -> tuple[list[Backup], list[Backup]]: + """Scan a directory and return a pair of lists: files to be kept, and files to be removed.""" + + backups = sorted( + ( + Backup(path=entry, size=entry.stat().st_size) + for entry in dir.glob(pattern) + if entry.is_file() + ), + reverse=True, + ) + + to_keep = backups[:keep] + to_remove = backups[keep:] + + return (to_keep, to_remove) + + +def cleanup(to_keep: list[Backup], to_remove: list[Backup], *, dry_run: bool = True): + """Delete old backup files and print disk space usage stats.""" + + used_space = sum(backup.size for backup in to_keep) + freed_space = sum(backup.size for backup in to_remove) + + if dry_run: + print("Dry run. No changes will be made.\n") + else: + for backup in to_remove: + backup.path.unlink() + + print(f"Used space: {len(to_keep)} files, {used_space} bytes") + print(f"Freed space: {len(to_remove)} files, {freed_space} bytes") + + +def main(): + args = parse_args() + + to_keep, to_remove = rotate(args.dir, args.keep, args.pattern) + cleanup(to_keep, to_remove, dry_run=not args.no_dry_run) + + +if __name__ == "__main__": + main() diff --git a/roles/postgres/files/test_rotate.py b/roles/postgres/files/test_rotate.py new file mode 100644 index 0000000..025a5c5 --- /dev/null +++ b/roles/postgres/files/test_rotate.py @@ -0,0 +1,125 @@ +import pathlib +import subprocess + +import pytest + + +EXPECTED_FILES = [ + "2024-06-27.tar.gz", + "2024-06-28.tar.gz", + "2024-06-29.tar.gz", + "2024-06-30.tar.gz", + "2024-07-01.tar.gz", + "2024-07-02.tar.gz", + "2024-07-03.tar.gz", + "2024-07-08.tar.gz", + "2024-07-09.tar.gz", +] + + +@pytest.fixture +def backups(tmp_path: pathlib.Path): + for filename in EXPECTED_FILES: + (tmp_path / filename).write_text(filename) + + assert sorted(item.name for item in tmp_path.glob("*")) == EXPECTED_FILES + return tmp_path + + +@pytest.fixture +def no_backups(tmp_path: pathlib.Path): + assert sorted(item.name for item in tmp_path.glob("*")) == [] + return tmp_path + + +def test_normal_invocation(backups: pathlib.Path): + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups] + ) + assert sorted(item.name for item in backups.glob("*")) == [ + "2024-07-03.tar.gz", + "2024-07-08.tar.gz", + "2024-07-09.tar.gz", + ] + + +def test_normal_invocation_is_idempotent(backups: pathlib.Path): + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups] + ) + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups] + ) + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups] + ) + assert sorted(item.name for item in backups.glob("*")) == [ + "2024-07-03.tar.gz", + "2024-07-08.tar.gz", + "2024-07-09.tar.gz", + ] + + +def test_dry_run_invocation(backups: pathlib.Path): + subprocess.check_call(["rotate.py", "--keep", "3", "--dir", backups]) + assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES + + +def test_keep_zero(backups: pathlib.Path): + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "0", "--dir", backups] + ) + assert sorted(item.name for item in backups.glob("*")) == [] + + +def test_keep_negative(backups: pathlib.Path): + with pytest.raises(subprocess.CalledProcessError): + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "-1", "--dir", backups] + ) + + +def test_keep_more_than_files(backups: pathlib.Path): + subprocess.check_call( + ["rotate.py", "--no-dry-run", "--keep", "100", "--dir", backups] + ) + assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES + + +def test_keep_pattern(backups): + subprocess.check_call( + [ + "rotate.py", + "--no-dry-run", + "--keep", + "1", + "--dir", + backups, + "--pattern", + "2024-06*tar.gz", + ] + ) + assert sorted(item.name for item in backups.glob("*")) == [ + "2024-06-30.tar.gz", + "2024-07-01.tar.gz", + "2024-07-02.tar.gz", + "2024-07-03.tar.gz", + "2024-07-08.tar.gz", + "2024-07-09.tar.gz", + ] + + +def test_keep_pattern_does_not_match_anything(backups): + subprocess.check_call( + [ + "rotate.py", + "--no-dry-run", + "--keep", + "1", + "--dir", + backups, + "--pattern", + "2024-08*tar.gz", + ] + ) + assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES diff --git a/roles/postgres/tasks/main.yml b/roles/postgres/tasks/main.yml index 4e16a3c..2513b8e 100644 --- a/roles/postgres/tasks/main.yml +++ b/roles/postgres/tasks/main.yml @@ -68,6 +68,13 @@ become: true become_user: postgres +- name: Install the script for backup rotation + ansible.builtin.copy: + src: 'rotate.py' + dest: '/usr/local/bin/rotate.py' + mode: 'u=rwx,g=rx,o=rx' + become: true + - name: Add a service template that allows creating backups of postgresql databases ansible.builtin.template: src: postgres-backups@.service.j2 diff --git a/roles/postgres/templates/postgres-backups@.service.j2 b/roles/postgres/templates/postgres-backups@.service.j2 index 6ce666d..bd6042c 100644 --- a/roles/postgres/templates/postgres-backups@.service.j2 +++ b/roles/postgres/templates/postgres-backups@.service.j2 @@ -9,3 +9,4 @@ Group = postgres WorkingDirectory = {{ postgres_backups_dir }} ExecStartPre = +/usr/bin/chown postgres:postgres {{ postgres_backups_dir }} ExecStart = /usr/bin/bash -c "/usr/bin/pg_dump --compress=9 --no-owner --format=p --file=%i_$(TZ=UTC date +%%Y%%m%%d-%%H%%M%%S).sql.gz %i" +ExecStartPost = /usr/bin/python /usr/local/bin/rotate.py --keep 30 --dir {{ postgres_backups_dir }}