Skip to content

Commit

Permalink
fix(backup): Improve DateUpdatedComparator (#53762)
Browse files Browse the repository at this point in the history
This comparator had a (granted, unlikely in practice) bug where it would
crash if only one of the two models had the field in question set. The
new setup tests for field existence before attempting the comparison
logic, and always returns a list of comparators (empty on success,
filled with findings on failure) rather than using the more confusing
union type like before.

Issue: getsentry/team-ospo#157
  • Loading branch information
azaslavsky authored Jul 31, 2023
1 parent 0e7192c commit d6793dd
Show file tree
Hide file tree
Showing 2 changed files with 117 additions and 21 deletions.
73 changes: 56 additions & 17 deletions src/sentry/runner/commands/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ def __init__(self, findings: list[ComparatorFinding]):
def append(self, finding: ComparatorFinding) -> None:
self.findings.append(finding)

def extend(self, findings: list[ComparatorFinding]) -> None:
self.findings += findings

def pretty(self) -> str:
return "\n".join(f.pretty() for f in self.findings)

Expand All @@ -67,8 +70,9 @@ class JSONScrubbingComparator(ABC):
otherwise equivalent JSON instances of the same model.
Each class inheriting from `JSONScrubbingComparator` should override the abstract `compare`
method with its own comparison logic. The `scrub` method is universal (it merely moves the
compared fields from the `fields` dictionary to the non-diffed `scrubbed` dictionary).
method with its own comparison logic. The `scrub` method merely moves the compared fields from
the `fields` dictionary to the non-diffed `scrubbed` dictionary, and may optionally be wrapped
if extra scrubbing logic is necessary.
If multiple comparators are used sequentially on a single model (see the `SCRUBBING_COMPARATORS`
dict below for specific mappings), all of the `compare(...)` methods are called before any of
Expand All @@ -89,20 +93,44 @@ def check(self, side: str, data: JSONData) -> None:
raise RuntimeError(f"The {side} input must have a `fields` dictionary.")

@abstractmethod
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
"""An abstract method signature, to be implemented by inheriting classes with their own
comparison logic. Implementations of this method MUST take care not to mutate the method's
inputs!"""

pass

def existence(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
"""Ensure that all tracked fields on either both models or neither."""

findings = []
for f in self.fields:
if f not in left["fields"] and f not in right["fields"]:
continue
if f not in left["fields"]:
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
reason=f"the left {f} value on `{on}` was missing",
)
)
if f not in right["fields"]:
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
reason=f"the right {f} value on `{on}` was missing",
)
)
return findings

def scrub(self, on: InstanceID, left: JSONData, right: JSONData) -> None:
"""Removes all of the fields compared by this comparator from the `fields` dict, so that the
remaining fields may be compared for equality.
Parameters:
- on: An `InstanceID` that must be shared by both versions of the JSON model being
compared.
- on: An `InstanceID` that must be shared by both versions of the JSON model being compared.
- left: One of the models being compared (usually the "before") version.
- right: The other model it is being compared against (usually the "after" or
post-processed version).
Expand Down Expand Up @@ -135,17 +163,23 @@ def __init__(self, field: str):
super().__init__([field])
self.field = field

def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
left_date_updated = left["fields"][self.field]
right_date_updated = right["fields"][self.field]
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
f = self.field
if f not in left["fields"] and f not in right["fields"]:
return []

left_date_updated = left["fields"][f]
right_date_updated = right["fields"][f]
if parser.parse(left_date_updated) > parser.parse(right_date_updated):
return ComparatorFinding(
kind=self.get_kind(),
on=on,
reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not less
than or equal to the right ({right_date_updated})""",
)
return None
return [
ComparatorFinding(
kind=self.get_kind(),
on=on,
reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not
less than or equal to the right ({right_date_updated})""",
)
]
return []


ComparatorList = List[JSONScrubbingComparator]
Expand Down Expand Up @@ -211,15 +245,20 @@ def json_lines(obj: JSONData) -> list[str]:
# Try comparators applicable for this specific model.
if id.model in comparators:
# We take care to run ALL of the `compare()` methods on each comparator before calling
# any `scrub()` methods. This ensures tha, in cases where a single model uses multiple
# any `scrub()` methods. This ensures that, in cases where a single model uses multiple
# comparators that touch the same fields, one comparator does not accidentally scrub the
# inputs for its follower. If `compare()` functions are well-behaved (that is, they
# don't mutate their inputs), this should be sufficient to ensure that the order in
# which comparators are applied does not change the final output.
for cmp in comparators[id.model]:
ex = cmp.existence(id, exp, act)
if ex:
findings.extend(ex)
continue

res = cmp.compare(id, exp, act)
if res:
findings.append(res)
findings.extend(res)
for cmp in comparators[id.model]:
cmp.scrub(id, exp, act)

Expand Down
65 changes: 61 additions & 4 deletions tests/sentry/backup/test_comparators.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,62 @@
from sentry.runner.commands.backup import DateUpdatedComparator, InstanceID


def test_good_comparator_both_sides_existing():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
present = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:12:34.567Z",
},
}
assert not cmp.existence(id, present, present)


def test_good_comparator_neither_side_existing():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
missing = {
"model": "test",
"pk": 1,
"fields": {},
}
assert not cmp.existence(id, missing, missing)


def test_bad_comparator_only_one_side_existing():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
present = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:12:34.567Z",
},
}
missing = {
"model": "test",
"pk": 1,
"fields": {},
}
res = cmp.existence(id, missing, present)
assert res
assert res[0]
assert res[0].on == id
assert res[0].kind == "DateUpdatedComparator"
assert "left" in res[0].reason
assert "my_date_field" in res[0].reason

res = cmp.existence(id, present, missing)
assert res
assert res[0]
assert res[0].on == id
assert res[0].kind == "DateUpdatedComparator"
assert "right" in res[0].reason
assert "my_date_field" in res[0].reason


def test_good_date_updated_comparator():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
Expand All @@ -18,7 +74,7 @@ def test_good_date_updated_comparator():
"my_date_field": "2023-06-22T23:00:00.123Z",
},
}
assert cmp.compare(id, left, right) is None
assert not cmp.compare(id, left, right)


def test_bad_date_updated_comparator():
Expand All @@ -39,6 +95,7 @@ def test_bad_date_updated_comparator():
},
}
res = cmp.compare(id, left, right)
assert res is not None
assert res.on == id
assert res.kind == "DateUpdatedComparator"
assert res
assert res[0]
assert res[0].on == id
assert res[0].kind == "DateUpdatedComparator"

0 comments on commit d6793dd

Please sign in to comment.