Skip to content

Commit

Permalink
feat(backup): Support comparators
Browse files Browse the repository at this point in the history
This change introduces the idea of comparators, or special handlers for
comparing JSON fields on exported models that cannot be tested for
simple stringh equivalency. The first of these comparators, the
`DateUpdatedComparator`, is added to ensure that comparisons on models
that have an audit timetamp incremented every time they are mutated are
still able to succeed.
  • Loading branch information
azaslavsky committed Jul 14, 2023
1 parent 4e5eebd commit 343704b
Show file tree
Hide file tree
Showing 2 changed files with 186 additions and 19 deletions.
113 changes: 103 additions & 10 deletions src/sentry/runner/commands/backup.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from datetime import datetime, timedelta, timezone
from difflib import unified_diff
from io import StringIO
from typing import NamedTuple, NewType
from typing import Dict, List, NamedTuple, NewType

import click
from dateutil import parser
from django.apps import apps
from django.core import management, serializers
from django.core.serializers import serialize
Expand All @@ -20,15 +22,14 @@
default=better_default_encoder, indent=2, ignore_nan=True, sort_keys=True
)

ComparatorName = NewType("ComparatorName", str)
ModelName = NewType("ModelName", str)
ComparatorSlug = NewType("ComparatorSlug", str)


# TODO(team-ospo/#155): Figure out if we are going to use `pk` as part of the identifier, or some other kind of sequence number internal to the JSON export instead.
class InstanceID(NamedTuple):
"""Every entry in the generated backup JSON file should have a unique model+pk combination, which serves as its identifier."""

model: ModelName
model: str
pk: int

def pretty(self) -> str:
Expand All @@ -38,7 +39,7 @@ def pretty(self) -> str:
class ComparatorFinding(NamedTuple):
"""Store all information about a single failed matching between expected and actual output."""

name: ComparatorName
name: ComparatorSlug
on: InstanceID
reason: str = ""

Expand All @@ -59,7 +60,90 @@ def pretty(self) -> str:
return "\n".join(f.pretty() for f in self.findings)


def validate(expect: JSONData, actual: JSONData) -> ComparatorFindings:
class JSONScrubbingComparator(ABC):
"""An abstract class that compares and then scrubs some set of fields that, by a more nuanced
definition that mere strict byte-for-byte equality, are expected to maintain some relation on
otherwise equivalent JSON instances of the same model.
Each class inheriting from `JSONScrubbingComparator` should override the abstract `compare`
method with its own comparison logic. The `scrub` method is universal (it merely moves the
compared fields from the `fields` dictionary to the non-diffed `scrubbed` dictionary).
If multiple comparators are used sequentially on a single model (see the `SCRUBBING_COMPARATORS`
dict below for specific mappings), all of the `compare(...)` methods are called before any of
the `scrub(...)` methods are. This ensures that comparators that touch the same fields do not
have their inputs mangled by one another."""

def __init__(self, fields: list[str]):
self.fields = fields

def check(self, side: str, data: JSONData) -> None:
"""Ensure that we have received valid JSON data at runtime."""

if "model" not in data or not isinstance(data["model"], str):
raise RuntimeError(f"The {side} input must have a `model` string assigned to it.")
if "pk" not in data or not isinstance(data["pk"], int):
raise RuntimeError(f"The {side} input must have a numerical `pk` entry.")
if "fields" not in data or not isinstance(data["fields"], dict):
raise RuntimeError(f"The {side} input must have a `fields` dictionary.")

@abstractmethod
def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
"""An abstract method signature, to be implemented by inhering classes with their own comparison logic."""

pass

def scrub(self, on: InstanceID, left: JSONData, right: JSONData) -> None:
"""Removes all of the fields compared by this comparator from the `fields` dict, so that the remaining fields may be compared for equality."""

self.check("left", left)
self.check("right", right)
if "scrubbed" not in left:
left["scrubbed"] = {}
if "scrubbed" not in right:
right["scrubbed"] = {}
for field in self.fields:
del left["fields"][field]
left["scrubbed"][f"{self.slug()}::{field}"] = True
del right["fields"][field]
right["scrubbed"][f"{self.slug()}::{field}"] = True

def slug(self) -> ComparatorSlug:
"""A unique identifier for this particular derivation of JSONScrubbingComparator, which will be bubbled up in ComparatorFindings when they are generated."""

return self.__class__.__name__


class DateUpdatedComparator(JSONScrubbingComparator):
"""Comparator that ensures that the specified field's value on the right input is an ISO-8601 date that is greater than (ie, occurs after) the specified field's left input."""

def __init__(self, field: str):
super().__init__([field])
self.field = field

def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> ComparatorFinding | None:
left_date_updated = left["fields"][self.field]
right_date_updated = right["fields"][self.field]
if parser.parse(left_date_updated) > parser.parse(right_date_updated):
return ComparatorFinding(
name=self.slug(),
on=on,
reason=f"""the left date_updated value on `{on}` ({left_date_updated}) was not less
than or equal to the right ({right_date_updated})""",
)


ComparatorList = List[JSONScrubbingComparator]
ComparatorMap = Dict[str, ComparatorList]
DEFAULT_COMPARATORS: ComparatorMap = {
"sentry.userrole": [DateUpdatedComparator("date_updated")],
"sentry.userroleuser": [DateUpdatedComparator("date_updated")],
}


def validate(
expect: JSONData, actual: JSONData, comparators: ComparatorMap = DEFAULT_COMPARATORS
) -> ComparatorFindings:
"""Ensures that originally imported data correctly matches actual outputted data, and produces a list of reasons why not when it doesn't"""

def json_lines(obj: JSONData) -> list[str]:
Expand All @@ -78,7 +162,7 @@ def json_lines(obj: JSONData) -> list[str]:
for model in actual:
id = InstanceID(model["model"], model["pk"])
if id in act_models:
findings.append(ComparatorFinding("duplicate_entry", id))
findings.append(ComparatorFinding("DuplicateEntry", id))
else:
act_models[id] = model

Expand All @@ -87,20 +171,29 @@ def json_lines(obj: JSONData) -> list[str]:
missing = sorted(exp_models.keys() - act_models.keys())
for id in extra:
del act_models[id]
findings.append(ComparatorFinding("unexpected_entry", id))
findings.append(ComparatorFinding("UnexpectedEntry", id))
for id in missing:
del exp_models[id]
findings.append(ComparatorFinding("missing_entry", id))
findings.append(ComparatorFinding("MissingEntry", id))

# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
# outputs.
for id, act in act_models.items():
exp = exp_models[id]

# Try comparators applicable for this specific model.
if id.model in comparators:
for cmp in comparators[id.model]:
res = cmp.compare(id, exp, act)
if res:
findings.append(ComparatorFinding(cmp.slug(), id, res))
else:
cmp.scrub(id, exp, act)

# Finally, perform a diff on the remaining JSON.
diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3))
if diff:
findings.append(ComparatorFinding("json_diff", id, "\n " + "\n ".join(diff)))
findings.append(ComparatorFinding("UnequalJSON", id, "\n " + "\n ".join(diff)))

return findings

Expand Down
92 changes: 83 additions & 9 deletions tests/sentry/backup/test_correctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,39 @@

import pytest
from click.testing import CliRunner
from freezegun import freeze_time

from sentry.runner.commands.backup import import_, validate
from sentry.runner.commands.backup import (
DEFAULT_COMPARATORS,
ComparatorMap,
DateUpdatedComparator,
InstanceID,
import_,
validate,
)
from sentry.silo import unguarded_write
from sentry.testutils.factories import get_fixture_path
from sentry.utils import json
from sentry.utils.pytest.fixtures import django_db_all
from tests.sentry.backup import ValidationError, tmp_export_to_file


def import_export_then_validate(tmp_path: Path, fixture_file_name: str) -> None:
def django_db_complete_reset(func=None):
"""Pytest decorator for resetting all databases, including pk sequences"""

if func is not None:
return pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__")(
func
)

def decorator(function):
return pytest.mark.django_db(transaction=True, reset_sequences=True, databases="__all__")(
function
)

return decorator


def import_export_then_validate(tmp_path: Path, fixture_file_name: str, map: ComparatorMap) -> None:
"""Test helper that validates that data imported from a fixture `.json` file correctly matches
the actual outputted export data."""

Expand All @@ -24,24 +46,76 @@ def import_export_then_validate(tmp_path: Path, fixture_file_name: str) -> None:
rv = CliRunner().invoke(import_, [str(fixture_file_path)])
assert rv.exit_code == 0, rv.output

res = validate(expect, tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")))
res = validate(
expect,
tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")),
map,
)
if res.findings:
raise ValidationError(res)


@django_db_all(transaction=True, reset_sequences=True)
@freeze_time("2023-06-22T23:00:00.123Z")
def test_good_fresh_install_validation(tmp_path):
import_export_then_validate(tmp_path, "fresh-install.json")
import_export_then_validate(tmp_path, "fresh-install.json", DEFAULT_COMPARATORS)


@django_db_all(transaction=True, reset_sequences=True)
def test_bad_fresh_install_validation(tmp_path):

with pytest.raises(ValidationError) as excinfo:
import_export_then_validate(tmp_path, "fresh-install.json")
assert len(excinfo.value.info.findings) == 2
import_export_then_validate(tmp_path, "fresh-install.json", {})
info = excinfo.value.info
assert len(info.findings) == 2
assert info.findings[0].name == "UnequalJSON"
assert info.findings[0].on == InstanceID("sentry.userrole", 1)
assert info.findings[1].name == "UnequalJSON"
assert info.findings[1].on == InstanceID("sentry.userroleuser", 1)


@django_db_all(transaction=True, reset_sequences=True)
def test_datetime_formatting(tmp_path):
import_export_then_validate(tmp_path, "datetime-formatting.json")
import_export_then_validate(tmp_path, "datetime-formatting.json", {})


def test_good_date_updated_comparator():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
left = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:00:00.123Z",
},
}
right = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:00:00.123Z",
},
}
assert cmp.compare(id, left, right) is None


def test_bad_date_updated_comparator():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
left = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:12:34.567Z",
},
}
right = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:00:00.001Z",
},
}
res = cmp.compare(id, left, right)
assert res is not None
assert res.on == id
assert res.name == "DateUpdatedComparator"

0 comments on commit 343704b

Please sign in to comment.