Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a primitive backup rotation mechanism #53

Merged
merged 1 commit into from
Jun 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,32 @@ jobs:
run: |
python -m pre_commit run --all-files --show-diff-on-failure

python:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4

- name: Setup Python
uses: actions/setup-python@v4
with:
python-version: '3.12'

- name: Install pytest / Ruff
run: |
python -m pip install pytest ruff

- name: Run Python linter
run: |
ruff format --check
ruff check

- name: Run Python tests
run: |
pushd roles/postgres/files
PATH=$PWD:$PATH pytest -v .
popd

ansible:
runs-on: ubuntu-latest
steps:
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__pycache__
112 changes: 112 additions & 0 deletions roles/postgres/files/rotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/python

"""Rotate backup files in a given directory.

Implements a primitive FIFO backup rotation strategy by keeping N most recent backup files. The
order is defined by sorting the file names lexicographically: the files that appear later in
the sorted list are considered to be newer.
"""

import argparse
import dataclasses
import pathlib
import typing


@dataclasses.dataclass(order=True)
class Backup:
path: pathlib.Path
size: int


class Args(typing.Protocol):
keep: int
dir: pathlib.Path
pattern: str
no_dry_run: bool


def non_negative_int(str_value: str) -> int:
value = int(str_value)
if value < 0:
raise argparse.ArgumentTypeError(f"Value must be non-negative: {value} < 0")

return value


def parse_args() -> Args:
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"-n",
"--keep",
required=True,
type=non_negative_int,
help="Keep this many most recent backup files",
)
parser.add_argument(
"-d", "--dir", type=pathlib.Path, help="Path to the directory with backup files"
)
parser.add_argument(
"-p",
"--pattern",
type=str,
default="*",
help="Only consider files that match this glob pattern",
)
parser.add_argument(
"--no-dry-run",
action="store_true",
help="Actually remove the rotated files",
)

namespace = parser.parse_args()
return typing.cast(Args, namespace)


def rotate(
dir: pathlib.Path,
keep: int,
pattern: str = "*",
) -> tuple[list[Backup], list[Backup]]:
"""Scan a directory and return a pair of lists: files to be kept, and files to be removed."""

backups = sorted(
(
Backup(path=entry, size=entry.stat().st_size)
for entry in dir.glob(pattern)
if entry.is_file()
),
reverse=True,
)

to_keep = backups[:keep]
to_remove = backups[keep:]

return (to_keep, to_remove)


def cleanup(to_keep: list[Backup], to_remove: list[Backup], *, dry_run: bool = True):
"""Delete old backup files and print disk space usage stats."""

used_space = sum(backup.size for backup in to_keep)
freed_space = sum(backup.size for backup in to_remove)

if dry_run:
print("Dry run. No changes will be made.\n")
else:
for backup in to_remove:
backup.path.unlink()

print(f"Used space: {len(to_keep)} files, {used_space} bytes")
print(f"Freed space: {len(to_remove)} files, {freed_space} bytes")


def main():
args = parse_args()

to_keep, to_remove = rotate(args.dir, args.keep, args.pattern)
cleanup(to_keep, to_remove, dry_run=not args.no_dry_run)


if __name__ == "__main__":
main()
125 changes: 125 additions & 0 deletions roles/postgres/files/test_rotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import pathlib
import subprocess

import pytest


EXPECTED_FILES = [
"2024-06-27.tar.gz",
"2024-06-28.tar.gz",
"2024-06-29.tar.gz",
"2024-06-30.tar.gz",
"2024-07-01.tar.gz",
"2024-07-02.tar.gz",
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


@pytest.fixture
def backups(tmp_path: pathlib.Path):
for filename in EXPECTED_FILES:
(tmp_path / filename).write_text(filename)

assert sorted(item.name for item in tmp_path.glob("*")) == EXPECTED_FILES
return tmp_path


@pytest.fixture
def no_backups(tmp_path: pathlib.Path):
assert sorted(item.name for item in tmp_path.glob("*")) == []
return tmp_path


def test_normal_invocation(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == [
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


def test_normal_invocation_is_idempotent(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "3", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == [
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


def test_dry_run_invocation(backups: pathlib.Path):
subprocess.check_call(["rotate.py", "--keep", "3", "--dir", backups])
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES


def test_keep_zero(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "0", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == []


def test_keep_negative(backups: pathlib.Path):
with pytest.raises(subprocess.CalledProcessError):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "-1", "--dir", backups]
)


def test_keep_more_than_files(backups: pathlib.Path):
subprocess.check_call(
["rotate.py", "--no-dry-run", "--keep", "100", "--dir", backups]
)
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES


def test_keep_pattern(backups):
subprocess.check_call(
[
"rotate.py",
"--no-dry-run",
"--keep",
"1",
"--dir",
backups,
"--pattern",
"2024-06*tar.gz",
]
)
assert sorted(item.name for item in backups.glob("*")) == [
"2024-06-30.tar.gz",
"2024-07-01.tar.gz",
"2024-07-02.tar.gz",
"2024-07-03.tar.gz",
"2024-07-08.tar.gz",
"2024-07-09.tar.gz",
]


def test_keep_pattern_does_not_match_anything(backups):
subprocess.check_call(
[
"rotate.py",
"--no-dry-run",
"--keep",
"1",
"--dir",
backups,
"--pattern",
"2024-08*tar.gz",
]
)
assert sorted(item.name for item in backups.glob("*")) == EXPECTED_FILES
7 changes: 7 additions & 0 deletions roles/postgres/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@
become: true
become_user: postgres

- name: Install the script for backup rotation
ansible.builtin.copy:
src: 'rotate.py'
dest: '/usr/local/bin/rotate.py'
mode: 'u=rwx,g=rx,o=rx'
become: true

- name: Add a service template that allows creating backups of postgresql databases
ansible.builtin.template:
src: [email protected]
Expand Down
1 change: 1 addition & 0 deletions roles/postgres/templates/[email protected]
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ Group = postgres
WorkingDirectory = {{ postgres_backups_dir }}
ExecStartPre = +/usr/bin/chown postgres:postgres {{ postgres_backups_dir }}
ExecStart = /usr/bin/bash -c "/usr/bin/pg_dump --compress=9 --no-owner --format=p --file=%i_$(TZ=UTC date +%%Y%%m%%d-%%H%%M%%S).sql.gz %i"
ExecStartPost = /usr/bin/python /usr/local/bin/rotate.py --keep 30 --dir {{ postgres_backups_dir }}
Loading