feat(backup): Support comparators

This change introduces the idea of comparators, or special handlers for comparing JSON fields on exported models that cannot be tested for simple stringh equivalency. The first of these comparators, the `DateUpdatedComparator`, is added to ensure that comparisons on models that have an audit timetamp incremented every time they are mutated are still able to succeed.
getsentry · Jul 11, 2023 · 4500d9b · 4500d9b
1 parent c1b9df4
commit 4500d9b
Show file tree

Hide file tree

Showing 2 changed files with 185 additions and 18 deletions.
diff --git a/src/sentry/runner/commands/backup.py b/src/sentry/runner/commands/backup.py
@@ -1,10 +1,12 @@
 from __future__ import annotations
 
+from abc import ABC, abstractmethod
 from difflib import unified_diff
 from io import StringIO
-from typing import NamedTuple, NewType
+from typing import Dict, List, NamedTuple, NewType
 
 import click
+from dateutil import parser
 from django.apps import apps
 from django.core import management, serializers
 from django.db import IntegrityError, connection, transaction
@@ -17,15 +19,14 @@
     default=better_default_encoder, indent=2, ignore_nan=True, sort_keys=True
 )
 
-ComparatorName = NewType("ComparatorName", str)
-ModelName = NewType("ModelName", str)
+ComparatorSlug = NewType("ComparatorSlug", str)
 
 
 # TODO(team-ospo/#155): Figure out if we are going to use `pk` as part of the identifier, or some other kind of sequence number internal to the JSON export instead.
 class InstanceID(NamedTuple):
     """Every entry in the generated backup JSON file should have a unique model+pk combination, which serves as its identifier."""
 
-    model: ModelName
+    model: str
     pk: int
 
     def pretty(self) -> str:
@@ -35,7 +36,7 @@ def pretty(self) -> str:
 class ComparatorFinding(NamedTuple):
     """Store all information about a single failed matching between expected and actual output."""
 
-    name: ComparatorName
+    name: ComparatorSlug
     on: InstanceID
     reason: str = ""
 
@@ -56,7 +57,90 @@ def pretty(self) -> str:
         return "\n".join(f.pretty() for f in self.findings)
 
 
-def validate(expect: JSONData, actual: JSONData) -> ComparatorFindings:
+class JSONScrubbingComparator(ABC):
+    """An abstract class that compares and then scrubs some set of fields that, by a more nuanced
+    definition that mere strict byte-for-byte equality, are expected to maintain some relation on
+    otherwise equivalent JSON instances of the same model.
+
+    Each class inheriting from `JSONScrubbingComparator` should override the abstract `compare`
+    method with its own comparison logic. The `scrub` method is universal (it merely moves the
+    compared fields from the `fields` dictionary to the non-diffed `scrubbed` dictionary).
+
+    If multiple comparators are used sequentially on a single model (see the `SCRUBBING_COMPARATORS`
+    dict below for specific mappings), all of the `compare(...)` methods are called before any of
+    the `scrub(...)` methods are. This ensures that comparators that touch the same fields do not
+    have their inputs mangled by one another."""
+
+    def __init__(self, fields: list[str]):
+        self.fields = fields
+
+    def check(self, side: str, data: JSONData) -> None:
+        """Ensure that we have received valid JSON data at runtime."""
+
+        if "model" not in data or not isinstance(data["model"], str):
+            raise RuntimeError(f"The {side} input must have a `model` string assigned to it.")
+        if "pk" not in data or not isinstance(data["pk"], int):
+            raise RuntimeError(f"The {side} input must have a numerical `pk` entry.")
+        if "fields" not in data or not isinstance(data["fields"], dict):
+            raise RuntimeError(f"The {side} input must have a `fields` dictionary.")
+
+    @abstractmethod
+    def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
+        """An abstract method signature, to be implemented by inhering classes with their own comparison logic."""
+
+        pass
+
+    def scrub(self, on: InstanceID, left: JSONData, right: JSONData) -> None:
+        """Removes all of the fields compared by this comparator from the `fields` dict, so that the remaining fields may be compared for equality."""
+
+        self.check("left", left)
+        self.check("right", right)
+        if "scrubbed" not in left:
+            left["scrubbed"] = {}
+        if "scrubbed" not in right:
+            right["scrubbed"] = {}
+        for field in self.fields:
+            del left["fields"][field]
+            left["scrubbed"][f"{self.slug()}::{field}"] = True
+            del right["fields"][field]
+            right["scrubbed"][f"{self.slug()}::{field}"] = True
+
+    def slug(self) -> ComparatorSlug:
+        """A unique identifier for this particular derivation of JSONScrubbingComparator, which will be bubbled up in ComparatorFindings when they are generated."""
+
+        return self.__class__.__name__
+
+
+class DateUpdatedComparator(JSONScrubbingComparator):
+    """Comparator that ensures that the specified field's value on the right input is an ISO-8601 date that is greater than (ie, occurs after) the specified field's left input."""
+
+    def __init__(self, field: str):
+        super().__init__([field])
+        self.field = field
+
+    def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
+        left_date_updated = left["fields"][self.field]
+        right_date_updated = right["fields"][self.field]
+        if parser.parse(left_date_updated) > parser.parse(right_date_updated):
+            return ComparatorFinding(
+                name=self.slug(),
+                on=on,
+                reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not less
+                than or equal to the right ({right_date_updated})""",
+            )
+
+
+ComparatorList = List[JSONScrubbingComparator]
+ComparatorMap = Dict[str, ComparatorList]
+DEFAULT_COMPARATORS: ComparatorMap = {
+    "sentry.userrole": [DateUpdatedComparator("date_updated")],
+    "sentry.userroleuser": [DateUpdatedComparator("date_updated")],
+}
+
+
+def validate(
+    expect: JSONData, actual: JSONData, comparators: ComparatorMap = DEFAULT_COMPARATORS
+) -> ComparatorFindings:
     """Ensures that originally imported data correctly matches actual outputted data, and produces a list of reasons why not when it doesn't"""
 
     def json_lines(obj: JSONData) -> list[str]:
@@ -75,7 +159,7 @@ def json_lines(obj: JSONData) -> list[str]:
     for model in actual:
         id = InstanceID(model["model"], model["pk"])
         if id in act_models:
-            findings.append(ComparatorFinding("duplicate_entry", id))
+            findings.append(ComparatorFinding("DuplicateEntry", id))
         else:
             act_models[id] = model
 
@@ -84,20 +168,29 @@ def json_lines(obj: JSONData) -> list[str]:
     missing = sorted(exp_models.keys() - act_models.keys())
     for id in extra:
         del act_models[id]
-        findings.append(ComparatorFinding("unexpected_entry", id))
+        findings.append(ComparatorFinding("UnexpectedEntry", id))
     for id in missing:
         del exp_models[id]
-        findings.append(ComparatorFinding("missing_entry", id))
+        findings.append(ComparatorFinding("MissingEntry", id))
 
     # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
     # outputs.
     for id, act in act_models.items():
         exp = exp_models[id]
 
+        # Try comparators applicable for this specific model.
+        if id.model in comparators:
+            for cmp in comparators[id.model]:
+                res = cmp.compare(id, exp, act)
+                if res:
+                    findings.append(ComparatorFinding(cmp.slug(), id, res))
+                else:
+                    cmp.scrub(id, exp, act)
+
         # Finally, perform a diff on the remaining JSON.
         diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3))
         if diff:
-            findings.append(ComparatorFinding("json_diff", id, "\n    " + "\n    ".join(diff)))
+            findings.append(ComparatorFinding("UnequalJSON", id, "\n    " + "\n    ".join(diff)))
 
     return findings
 

diff --git a/tests/sentry/backup/test_correctness.py b/tests/sentry/backup/test_correctness.py
@@ -2,17 +2,39 @@
 
 import pytest
 from click.testing import CliRunner
-from freezegun import freeze_time
 
 from sentry.db.postgres.roles import in_test_psql_role_override
-from sentry.runner.commands.backup import import_, validate
+from sentry.runner.commands.backup import (
+    DEFAULT_COMPARATORS,
+    ComparatorMap,
+    DateUpdatedComparator,
+    InstanceID,
+    import_,
+    validate,
+)
 from sentry.testutils.factories import get_fixture_path
 from sentry.utils import json
 from sentry.utils.pytest.fixtures import django_db_all
 from tests.sentry.backup import ValidationError, tmp_export_to_file
 
 
-def import_export_then_validate(tmp_path: Path, fixture_file_name: str) -> None:
+def django_db_complete_reset(func=None):
+    """Pytest decorator for resetting all databases, including pk sequences"""
+
+    if func is not None:
+        return pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__")(
+            func
+        )
+
+    def decorator(function):
+        return pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__")(
+            function
+        )
+
+    return decorator
+
+
+def import_export_then_validate(tmp_path: Path, fixture_file_name: str, map: ComparatorMap) -> None:
     """Test helper that validates that data imported from a fixture `.json` file correctly matches
     the actual outputted export data."""
 
@@ -24,19 +46,71 @@ def import_export_then_validate(tmp_path: Path, fixture_file_name: str) -> None:
         rv = CliRunner().invoke(import_, [str(fixture_file_path)])
         assert rv.exit_code == 0, rv.output
 
-    res = validate(expect, tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")))
+    res = validate(
+        expect,
+        tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")),
+        map,
+    )
     if res.findings:
         raise ValidationError(res)
 
 
 @django_db_all(transaction=True, reset_sequences=True)
-@freeze_time("2023-06-22T23:00:00.123Z")
 def test_good_fresh_install_validation(tmp_path):
-    import_export_then_validate(tmp_path, "fresh-install.json")
+    import_export_then_validate(tmp_path, "fresh-install.json", DEFAULT_COMPARATORS)
 
 
 @django_db_all(transaction=True, reset_sequences=True)
 def test_bad_fresh_install_validation(tmp_path):
+
     with pytest.raises(ValidationError) as excinfo:
-        import_export_then_validate(tmp_path, "fresh-install.json")
-    assert len(excinfo.value.info.findings) == 2
+        import_export_then_validate(tmp_path, "fresh-install.json", {})
+    info = excinfo.value.info
+    assert len(info.findings) == 2
+    assert info.findings[0].name == "UnequalJSON"
+    assert info.findings[0].on == InstanceID("sentry.userrole", 1)
+    assert info.findings[1].name == "UnequalJSON"
+    assert info.findings[1].on == InstanceID("sentry.userroleuser", 1)
+
+
+def test_good_date_updated_comparator():
+    cmp = DateUpdatedComparator("my_date_field")
+    id = InstanceID("test", 1)
+    left = {
+        "model": "test",
+        "pk": 1,
+        "fields": {
+            "my_date_field": "2023-06-22T23:00:00.123Z",
+        },
+    }
+    right = {
+        "model": "test",
+        "pk": 1,
+        "fields": {
+            "my_date_field": "2023-06-22T23:00:00.123Z",
+        },
+    }
+    assert cmp.compare(id, left, right) is None
+
+
+def test_bad_date_updated_comparator():
+    cmp = DateUpdatedComparator("my_date_field")
+    id = InstanceID("test", 1)
+    left = {
+        "model": "test",
+        "pk": 1,
+        "fields": {
+            "my_date_field": "2023-06-22T23:12:34.567Z",
+        },
+    }
+    right = {
+        "model": "test",
+        "pk": 1,
+        "fields": {
+            "my_date_field": "2023-06-22T23:00:00.001Z",
+        },
+    }
+    res = cmp.compare(id, left, right)
+    assert res is not None
+    assert res.on == id
+    assert res.name == "DateUpdatedComparator"