Skip to content

Commit

Permalink
feat(backup): Support foreign key remapping (#54610)
Browse files Browse the repository at this point in the history
When importing into a database that is not entirely flushed, sequences
and all, we run into a problem: the foriegn keys in the source JSON file
will not match the primary keys of the newly `INSERT`ed models we will
be importing. This will cause downstream imports that depend on upstream
imports to fail.

To resolve this, we maintain a `PrimaryKeyMap` of old pks to new pks for
every model. As we import models, we take note of their new pks, so that
when foreign key references to these models are encountered later on, we
can perform a simple replacement.

This generally works well enough, but because we have a circular
dependency between `Actor` and `Team`, we must take care to do the
appropriate set of dance moves to avoid writing `Actor`s with
(necessarily) non-existent `Team` references.

To test these changes, I've modified all of `test_models` to *not* reset
sequences between database uploads. This should ensure that every such
test will produce two JSON files with differing pks, which should give
us fairly thorough coverage.

Issue: getsentry/team-ospo#170
Issue: getsentry/team-ospo#171
  • Loading branch information
azaslavsky authored Aug 15, 2023
1 parent 7518d18 commit 11011d4
Show file tree
Hide file tree
Showing 8 changed files with 230 additions and 38 deletions.
65 changes: 65 additions & 0 deletions fixtures/backup/user-pk-mapping.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
[
{
"model": "sentry.email",
"pk": 34,
"fields": {
"email": "[email protected]",
"date_added": "2023-06-22T00:00:00.123Z"
}
},
{
"model": "sentry.user",
"pk": 12,
"fields": {
"password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=",
"last_login": null,
"username": "[email protected]",
"name": "",
"email": "[email protected]",
"is_staff": true,
"is_active": true,
"is_superuser": true,
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
"date_joined": "2023-06-22T22:59:55.488Z",
"last_active": "2023-06-22T22:59:55.489Z",
"avatar_type": 0,
"avatar_url": null
}
},
{
"model": "sentry.useremail",
"pk": 56,
"fields": {
"user": 12,
"email": "[email protected]",
"validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE",
"date_hash_added": "2023-06-22T00:00:00.456Z",
"is_verified": false
}
},
{
"model": "sentry.userrole",
"pk": 78,
"fields": {
"date_updated": "2023-06-22T23:00:00.123Z",
"date_added": "2023-06-22T22:54:27.960Z",
"name": "Super Admin",
"permissions": "['broadcasts.admin', 'users.admin', 'options.admin']"
}
},
{
"model": "sentry.userroleuser",
"pk": 90,
"fields": {
"date_updated": "2023-06-22T23:00:00.123Z",
"date_added": "2023-06-22T22:59:57.000Z",
"user": 12,
"role": 78
}
}
]
11 changes: 8 additions & 3 deletions src/sentry/backup/comparators.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Callable, Dict, List
from typing import Callable, Dict, List, Type

from dateutil import parser
from django.db import models
Expand All @@ -11,6 +11,8 @@
from sentry.backup.findings import ComparatorFinding, ComparatorFindingKind, InstanceID
from sentry.backup.helpers import Side, get_exportable_final_derivations_of
from sentry.db.models import BaseModel
from sentry.models.team import Team
from sentry.models.user import User
from sentry.utils.json import JSONData


Expand Down Expand Up @@ -209,7 +211,7 @@ class ForeignKeyComparator(JSONScrubbingComparator):
left_pk_map: PrimaryKeyMap | None = None
right_pk_map: PrimaryKeyMap | None = None

def __init__(self, foreign_fields: dict[str, models.base.ModelBase]):
def __init__(self, foreign_fields: dict[str, Type[models.base.Model]]):
super().__init__(*(foreign_fields.keys()))
self.foreign_fields = foreign_fields

Expand All @@ -223,7 +225,8 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
findings = []
fields = sorted(self.fields)
for f in fields:
field_model_name = "sentry." + self.foreign_fields[f].__name__.lower()
obj_name = self.foreign_fields[f]._meta.object_name.lower() # type: ignore[union-attr]
field_model_name = "sentry." + obj_name
if left["fields"].get(f) is None and right["fields"].get(f) is None:
continue

Expand Down Expand Up @@ -429,6 +432,8 @@ def build_default_comparators():
comparators: ComparatorMap = defaultdict(
list,
{
# TODO(hybrid-cloud): actor refactor. Remove this entry when done.
"sentry.actor": [ForeignKeyComparator({"team": Team, "user_id": User})],
"sentry.apitoken": [HashObfuscatingComparator("refresh_token", "token")],
"sentry.apiapplication": [HashObfuscatingComparator("client_id", "client_secret")],
"sentry.authidentity": [HashObfuscatingComparator("ident", "token")],
Expand Down
19 changes: 11 additions & 8 deletions src/sentry/backup/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from collections import defaultdict
from enum import Enum, auto, unique
from typing import NamedTuple
from typing import NamedTuple, Type

from django.db import models
from django.db.models.fields.related import ForeignKey, OneToOneField
Expand Down Expand Up @@ -35,24 +35,24 @@ class ForeignFieldKind(Enum):
class ForeignField(NamedTuple):
"""A field that creates a dependency on another Sentry model."""

model: models.base.ModelBase
model: Type[models.base.Model]
kind: ForeignFieldKind


class ModelRelations(NamedTuple):
"""What other models does this model depend on, and how?"""

model: models.base.ModelBase
model: Type[models.base.Model]
foreign_keys: dict[str, ForeignField]
silos: list[SiloMode]

def flatten(self) -> set[models.base.ModelBase]:
def flatten(self) -> set[Type[models.base.Model]]:
"""Returns a flat list of all related models, omitting the kind of relation they have."""

return {ff.model for ff in self.foreign_keys.values()}


def normalize_model_name(model):
def normalize_model_name(model: Type[models.base.Model]):
return f"{model._meta.app_label}.{model._meta.object_name}"


Expand All @@ -61,8 +61,11 @@ class DependenciesJSONEncoder(json.JSONEncoder):
`ModelRelations`."""

def default(self, obj):
if isinstance(obj, models.base.ModelBase):
return normalize_model_name(obj)
if isinstance(obj, models.base.Model):
return normalize_model_name(type(obj))
if meta := getattr(obj, "_meta", None):
# Note: done to accommodate `node.Nodestore`.
return f"{meta.app_label}.{meta.object_name}"
if isinstance(obj, ForeignFieldKind):
return obj.name
if isinstance(obj, SiloMode):
Expand Down Expand Up @@ -226,7 +229,7 @@ def sorted_dependencies():
"Can't resolve dependencies for %s in serialized app list."
% ", ".join(
normalize_model_name(m.model)
for m in sorted(skipped, key=lambda obj: normalize_model_name(obj))
for m in sorted(skipped, key=lambda mr: normalize_model_name(mr.model))
)
)
model_dependencies_list = skipped
Expand Down
69 changes: 67 additions & 2 deletions src/sentry/backup/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from django.apps import apps
from django.core import management, serializers
from django.db import IntegrityError, connection, transaction
from django.forms import model_to_dict

from sentry.backup.dependencies import PrimaryKeyMap, dependencies, normalize_model_name
from sentry.backup.helpers import EXCLUDED_APPS


Expand All @@ -27,16 +29,79 @@ class OldImportConfig(NamedTuple):


def imports(src, old_config: OldImportConfig, printer=click.echo):
"""CLI command wrapping the `exec_import` functionality."""
"""Imports core data for the Sentry installation."""

# TODO(hybrid-cloud): actor refactor. Remove this import when done.
from sentry.models.actor import Actor

try:
# Import / export only works in monolith mode with a consolidated db.
with transaction.atomic("default"):
pk_map = PrimaryKeyMap()
deps = dependencies()

for obj in serializers.deserialize(
"json", src, stream=True, use_natural_keys=old_config.use_natural_foreign_keys
):
if obj.object._meta.app_label not in EXCLUDED_APPS:
obj.save()
# TODO(getsentry/team-ospo#183): This conditional should be removed once we want
# to roll out the new API to self-hosted.
if old_config.use_update_instead_of_create:
obj.save()
else:
o = obj.object
label = o._meta.label_lower
model_name = normalize_model_name(o)
for field, model_relation in deps[model_name].foreign_keys.items():
field_id = f"{field}_id"
fk = getattr(o, field_id, None)
if fk is not None:
new_pk = pk_map.get(normalize_model_name(model_relation.model), fk)
# TODO(getsentry/team-ospo#167): Will allow missing items when we
# implement org-based filtering.
setattr(o, field_id, new_pk)

old_pk = o.pk
o.pk = None
o.id = None

# TODO(hybrid-cloud): actor refactor. Remove this conditional when done.
#
# `Actor` and `Team` have a direct circular dependency between them for the
# time being due to an ongoing refactor (that is, `Actor` foreign keys
# directly into `Team`, and `Team` foreign keys directly into `Actor`). If
# we use `INSERT` database calls naively, they will always fail, because one
# half of the cycle will always be missing.
#
# Because `Actor` ends up first in the dependency sorting (see:
# fixtures/backup/model_dependencies/sorted.json), a viable solution here is
# to always null out the `team_id` field of the `Actor` when we write it,
# and then make sure to circle back and update the relevant actor after we
# create the `Team` models later on (see snippet at the end of this scope).
if label == "sentry.actor":
o.team_id = None

# TODO(getsentry/team-ospo#181): what's up with email/useremail here? Seems
# like both gets added with `sentry.user` simultaneously? Will need to make
# more robust user handling logic, and to test what happens when a UserEmail
# already exists.
if label == "sentry.useremail":
(o, _) = o.__class__.objects.get_or_create(
user=o.user, email=o.email, defaults=model_to_dict(o)
)
pk_map.insert(model_name, old_pk, o.pk)
continue

obj.save(force_insert=True)
pk_map.insert(model_name, old_pk, o.pk)

# TODO(hybrid-cloud): actor refactor. Remove this conditional when done.
if label == "sentry.team":
if o.actor_id is not None:
actor = Actor.objects.get(pk=o.actor_id)
actor.team_id = o.pk
actor.save()

# For all database integrity errors, let's warn users to follow our
# recommended backup/restore workflow before reraising exception. Most of
# these errors come from restoring on a different version of Sentry or not restoring
Expand Down
15 changes: 10 additions & 5 deletions src/sentry/backup/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,19 +129,24 @@ def json_lines(obj: JSONData) -> list[str]:
left_pk_map = PrimaryKeyMap()
right_pk_map = PrimaryKeyMap()

# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
# outputs.
# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
# model that we encounter later.
for id, right in right_models.items():
if id.ordinal is None:
raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")

# Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this
# model that we encounter later.
left = left_models[id]
left_pk_map.insert(id.model, left["pk"], id.ordinal)
left_pk_map.insert(id.model, left_models[id]["pk"], id.ordinal)
right_pk_map.insert(id.model, right["pk"], id.ordinal)

# We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both
# outputs.
for id, right in right_models.items():
if id.ordinal is None:
raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set")

# Try comparators applicable for this specific model.
left = left_models[id]
if id.model in comparators:
# We take care to run ALL of the `compare()` methods on each comparator before calling
# any `scrub()` methods. This ensures that, in cases where a single model uses multiple
Expand Down
46 changes: 37 additions & 9 deletions src/sentry/testutils/helpers/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,27 @@
import tempfile
from pathlib import Path

from django.apps import apps
from django.core.management import call_command
from django.db import connections, router, transaction

from sentry.backup.comparators import ComparatorMap
from sentry.backup.dependencies import sorted_dependencies
from sentry.backup.exports import OldExportConfig, exports
from sentry.backup.findings import ComparatorFindings
from sentry.backup.helpers import get_exportable_final_derivations_of, get_final_derivations_of
from sentry.backup.imports import OldImportConfig, imports
from sentry.backup.validate import validate
from sentry.models.integrations.sentry_app import SentryApp
from sentry.silo import unguarded_write
from sentry.testutils.factories import get_fixture_path
from sentry.utils import json
from sentry.utils.json import JSONData

__all__ = [
"ValidationError",
"export_to_file",
"get_final_derivations_of",
"get_exportable_final_derivations_of",
"import_export_then_validate",
"import_export_from_fixture_then_validate",
"ValidationError",
]

NOOP_PRINTER = lambda *args, **kwargs: None
Expand All @@ -46,7 +47,30 @@ def export_to_file(path: Path) -> JSONData:
return output


def import_export_then_validate(method_name: str) -> JSONData:
REVERSED_DEPENDENCIES = sorted_dependencies()
REVERSED_DEPENDENCIES.reverse()


def clear_database_but_keep_sequences():
"""Deletes all models we care about from the database, in a sequence that ensures we get no
foreign key errors."""

with unguarded_write(using="default"), transaction.atomic(using="default"):
for model in REVERSED_DEPENDENCIES:
# For some reason, the tables for `SentryApp*` models don't get deleted properly here
# when using `model.objects.all().delete()`, so we have to call out to Postgres
# manually.
connection = connections[router.db_for_write(SentryApp)]
with connection.cursor() as cursor:
table = model._meta.db_table
cursor.execute(f"DELETE FROM {table:s};")

# Clear remaining tables that are not explicitly in Sentry's own model dependency graph.
for model in set(apps.get_models()) - set(REVERSED_DEPENDENCIES):
model.objects.all().delete()


def import_export_then_validate(method_name: str, *, reset_pks: bool = True) -> JSONData:
"""Test helper that validates that dat imported from an export of the current state of the test
database correctly matches the actual outputted export data."""

Expand All @@ -60,10 +84,14 @@ def import_export_then_validate(method_name: str) -> JSONData:

# Write the contents of the "expected" JSON file into the now clean database.
# TODO(Hybrid-Cloud): Review whether this is the correct route to apply in this case.
with unguarded_write(using="default"), open(tmp_expect) as tmp_file:
# Reset the Django database.
call_command("flush", verbosity=0, interactive=False)
imports(tmp_file, OldImportConfig(), NOOP_PRINTER)
with unguarded_write(using="default"):
if reset_pks:
call_command("flush", verbosity=0, interactive=False)
else:
clear_database_but_keep_sequences()

with open(tmp_expect) as tmp_file:
imports(tmp_file, OldImportConfig(), NOOP_PRINTER)

# Validate that the "expected" and "actual" JSON matches.
actual = export_to_file(tmp_actual)
Expand Down
Loading

0 comments on commit 11011d4

Please sign in to comment.