Skip to content

Commit

Permalink
ref(backup): Do deepcopy before validation
Browse files Browse the repository at this point in the history
Previously, all validation mutated the inputs in-place. This was
performant (no need to make deep copies of two potentially large JSON
blobs), but potentially bug prone if not held carefully, since the
validation itself mutated the inputs via JSONScrubbingComparators. This
means that the post-comparison input objects MUST NOT be re-used, since
they are meaningfully different from the inputs that were declared
valid.

Instead, we now do deepcopies under the hood, leaving the originally
passed-in inputs untouched.

Issue: getsentry/team-ospo#156
  • Loading branch information
azaslavsky committed Jul 21, 2023
1 parent dc8dfc1 commit 3a3515e
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
13 changes: 11 additions & 2 deletions src/sentry/runner/commands/backup.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from copy import deepcopy
from datetime import datetime, timedelta, timezone
from difflib import unified_diff
from io import StringIO
Expand Down Expand Up @@ -159,10 +160,13 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> Comparator


def validate(
expect: JSONData, actual: JSONData, comparators: ComparatorMap = DEFAULT_COMPARATORS
expect: JSONData,
actual: JSONData,
comparators: ComparatorMap = DEFAULT_COMPARATORS,
) -> ComparatorFindings:
"""Ensures that originally imported data correctly matches actual outputted data, and produces a
list of reasons why not when it doesn't"""
list of reasons why not when it doesn't.
"""

def json_lines(obj: JSONData) -> list[str]:
"""Take a JSONData object and pretty-print it as JSON."""
Expand All @@ -176,6 +180,11 @@ def json_lines(obj: JSONData) -> list[str]:
id = InstanceID(model["model"], model["pk"])
exp_models[id] = model

# Because we may be scrubbing data from the objects as we compare them, we may (optionally) make
# deep copies to start to avoid potentially mangling the input data.
expect = deepcopy(expect)
actual = deepcopy(actual)

# Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not.
for model in actual:
id = InstanceID(model["model"], model["pk"])
Expand Down
10 changes: 4 additions & 6 deletions tests/sentry/backup/test_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@


def import_export_then_validate(
tmp_path: Path, fixture_file_name: str, map: ComparatorMap = EMPTY_COMPARATORS_FOR_TESTING
tmp_path: Path,
fixture_file_name: str,
map: ComparatorMap = EMPTY_COMPARATORS_FOR_TESTING,
) -> None:
"""Test helper that validates that data imported from a fixture `.json` file correctly matches
the actual outputted export data."""
Expand All @@ -34,11 +36,7 @@ def import_export_then_validate(
rv = CliRunner().invoke(import_, [str(fixture_file_path)])
assert rv.exit_code == 0, rv.output

res = validate(
expect,
tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")),
map,
)
res = validate(expect, tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")), map)
if res.findings:
raise ValidationError(res)

Expand Down

0 comments on commit 3a3515e

Please sign in to comment.