fix(backup): Improve DateUpdatedComparator

This comparator had a (granted, unlikely in practice) bug where it would crash if only one of the two models had the field in question set. The new setup tests for field existence before attempting the comparison logic, and always returns a list of comparators (empty on success, filled with findings on failure) rather than using the more confusing union type like before. Issue: getsentry/team-ospo#157
getsentry · Jul 31, 2023 · 295a3bc · 295a3bc
1 parent bb4e8da
commit 295a3bc
Show file tree

Hide file tree

Showing 2 changed files with 117 additions and 21 deletions.
diff --git a/src/sentry/runner/commands/backup.py b/src/sentry/runner/commands/backup.py
@@ -57,6 +57,9 @@ def __init__(self, findings: list[ComparatorFinding]):
     def append(self, finding: ComparatorFinding) -> None:
         self.findings.append(finding)
 
+    def extend(self, findings: list[ComparatorFinding]) -> None:
+        self.findings += findings
+
     def pretty(self) -> str:
         return "\n".join(f.pretty() for f in self.findings)
 
@@ -67,8 +70,9 @@ class JSONScrubbingComparator(ABC):
     otherwise equivalent JSON instances of the same model.
 
     Each class inheriting from `JSONScrubbingComparator` should override the abstract `compare`
-    method with its own comparison logic. The `scrub` method is universal (it merely moves the
-    compared fields from the `fields` dictionary to the non-diffed `scrubbed` dictionary).
+    method with its own comparison logic. The `scrub` method merely moves the compared fields from
+    the `fields` dictionary to the non-diffed `scrubbed` dictionary, and may optionally be wrapped
+    if extra scrubbing logic is necessary.
 
     If multiple comparators are used sequentially on a single model (see the `SCRUBBING_COMPARATORS`
     dict below for specific mappings), all of the `compare(...)` methods are called before any of
@@ -89,20 +93,44 @@ def check(self, side: str, data: JSONData) -> None:
             raise RuntimeError(f"The {side} input must have a `fields` dictionary.")
 
     @abstractmethod
-    def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
+    def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
         """An abstract method signature, to be implemented by inheriting classes with their own
         comparison logic. Implementations of this method MUST take care not to mutate the method's
         inputs!"""
 
         pass
 
+    def existence(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
+        """Ensure that all tracked fields on either both models or neither."""
+
+        findings = []
+        for f in self.fields:
+            if f not in left["fields"] and f not in right["fields"]:
+                continue
+            if f not in left["fields"]:
+                findings.append(
+                    ComparatorFinding(
+                        kind=self.get_kind(),
+                        on=on,
+                        reason=f"the left {f} value on `{on}` was missing",
+                    )
+                )
+            if f not in right["fields"]:
+                findings.append(
+                    ComparatorFinding(
+                        kind=self.get_kind(),
+                        on=on,
+                        reason=f"the right {f} value on `{on}` was missing",
+                    )
+                )
+        return findings
+
     def scrub(self, on: InstanceID, left: JSONData, right: JSONData) -> None:
         """Removes all of the fields compared by this comparator from the `fields` dict, so that the
         remaining fields may be compared for equality.
 
         Parameters:
-        - on: An `InstanceID` that must be shared by both versions of the JSON model being
-            compared.
+        - on: An `InstanceID` that must be shared by both versions of the JSON model being compared.
         - left: One of the models being compared (usually the "before") version.
         - right: The other model it is being compared against (usually the "after" or
             post-processed version).
@@ -135,17 +163,23 @@ def __init__(self, field: str):
         super().__init__([field])
         self.field = field
 
-    def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
-        left_date_updated = left["fields"][self.field]
-        right_date_updated = right["fields"][self.field]
+    def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
+        f = self.field
+        if f not in left["fields"] and f not in right["fields"]:
+            return []
+
+        left_date_updated = left["fields"][f]
+        right_date_updated = right["fields"][f]
         if parser.parse(left_date_updated) > parser.parse(right_date_updated):
-            return ComparatorFinding(
-                kind=self.get_kind(),
-                on=on,
-                reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not less
-                than or equal to the right ({right_date_updated})""",
-            )
-        return None
+            return [
+                ComparatorFinding(
+                    kind=self.get_kind(),
+                    on=on,
+                    reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not
+                            less than or equal to the right ({right_date_updated})""",
+                )
+            ]
+        return []
 
 
 ComparatorList = List[JSONScrubbingComparator]
@@ -211,15 +245,20 @@ def json_lines(obj: JSONData) -> list[str]:
         # Try comparators applicable for this specific model.
         if id.model in comparators:
             # We take care to run ALL of the `compare()` methods on each comparator before calling
-            # any `scrub()` methods. This ensures tha, in cases where a single model uses multiple
+            # any `scrub()` methods. This ensures that, in cases where a single model uses multiple
             # comparators that touch the same fields, one comparator does not accidentally scrub the
             # inputs for its follower. If `compare()` functions are well-behaved (that is, they
             # don't mutate their inputs), this should be sufficient to ensure that the order in
             # which comparators are applied does not change the final output.
             for cmp in comparators[id.model]:
+                ex = cmp.existence(id, exp, act)
+                if ex:
+                    findings.extend(ex)
+                    continue
+
                 res = cmp.compare(id, exp, act)
                 if res:
-                    findings.append(res)
+                    findings.extend(res)
             for cmp in comparators[id.model]:
                 cmp.scrub(id, exp, act)
 

diff --git a/tests/sentry/backup/test_comparators.py b/tests/sentry/backup/test_comparators.py
@@ -1,6 +1,62 @@
 from sentry.runner.commands.backup import DateUpdatedComparator, InstanceID
 
 
+def test_good_comparator_both_sides_existing():
+    cmp = DateUpdatedComparator("my_date_field")
+    id = InstanceID("test", 1)
+    present = {
+        "model": "test",
+        "pk": 1,
+        "fields": {
+            "my_date_field": "2023-06-22T23:12:34.567Z",
+        },
+    }
+    assert not cmp.existence(id, present, present)
+
+
+def test_good_comparator_neither_side_existing():
+    cmp = DateUpdatedComparator("my_date_field")
+    id = InstanceID("test", 1)
+    missing = {
+        "model": "test",
+        "pk": 1,
+        "fields": {},
+    }
+    assert not cmp.existence(id, missing, missing)
+
+
+def test_bad_comparator_only_one_side_existing():
+    cmp = DateUpdatedComparator("my_date_field")
+    id = InstanceID("test", 1)
+    present = {
+        "model": "test",
+        "pk": 1,
+        "fields": {
+            "my_date_field": "2023-06-22T23:12:34.567Z",
+        },
+    }
+    missing = {
+        "model": "test",
+        "pk": 1,
+        "fields": {},
+    }
+    res = cmp.existence(id, missing, present)
+    assert res
+    assert res[0]
+    assert res[0].on == id
+    assert res[0].kind == "DateUpdatedComparator"
+    assert "left" in res[0].reason
+    assert "my_date_field" in res[0].reason
+
+    res = cmp.existence(id, present, missing)
+    assert res
+    assert res[0]
+    assert res[0].on == id
+    assert res[0].kind == "DateUpdatedComparator"
+    assert "right" in res[0].reason
+    assert "my_date_field" in res[0].reason
+
+
 def test_good_date_updated_comparator():
     cmp = DateUpdatedComparator("my_date_field")
     id = InstanceID("test", 1)
@@ -18,7 +74,7 @@ def test_good_date_updated_comparator():
             "my_date_field": "2023-06-22T23:00:00.123Z",
         },
     }
-    assert cmp.compare(id, left, right) is None
+    assert not cmp.compare(id, left, right)
 
 
 def test_bad_date_updated_comparator():
@@ -39,6 +95,7 @@ def test_bad_date_updated_comparator():
         },
     }
     res = cmp.compare(id, left, right)
-    assert res is not None
-    assert res.on == id
-    assert res.kind == "DateUpdatedComparator"
+    assert res
+    assert res[0]
+    assert res[0].on == id
+    assert res[0].kind == "DateUpdatedComparator"