diff --git a/fixtures/backup/user-pk-mapping.json b/fixtures/backup/user-pk-mapping.json new file mode 100644 index 00000000000000..5f7b7d3f62d067 --- /dev/null +++ b/fixtures/backup/user-pk-mapping.json @@ -0,0 +1,65 @@ +[ + { + "model": "sentry.email", + "pk": 34, + "fields": { + "email": "testing@example.com", + "date_added": "2023-06-22T00:00:00.123Z" + } + }, + { + "model": "sentry.user", + "pk": 12, + "fields": { + "password": "pbkdf2_sha256$150000$iEvdIknqYjTr$+QsGn0tfIJ1FZLxQI37mVU1gL2KbL/wqjMtG/dFhsMA=", + "last_login": null, + "username": "testing@example.com", + "name": "", + "email": "testing@example.com", + "is_staff": true, + "is_active": true, + "is_superuser": true, + "is_managed": false, + "is_sentry_app": null, + "is_password_expired": false, + "last_password_change": "2023-06-22T22:59:57.023Z", + "flags": "0", + "session_nonce": null, + "date_joined": "2023-06-22T22:59:55.488Z", + "last_active": "2023-06-22T22:59:55.489Z", + "avatar_type": 0, + "avatar_url": null + } + }, + { + "model": "sentry.useremail", + "pk": 56, + "fields": { + "user": 12, + "email": "testing@example.com", + "validation_hash": "mCnWesSVvYQcq7qXQ36AZHwosAd6cghE", + "date_hash_added": "2023-06-22T00:00:00.456Z", + "is_verified": false + } + }, + { + "model": "sentry.userrole", + "pk": 78, + "fields": { + "date_updated": "2023-06-22T23:00:00.123Z", + "date_added": "2023-06-22T22:54:27.960Z", + "name": "Super Admin", + "permissions": "['broadcasts.admin', 'users.admin', 'options.admin']" + } + }, + { + "model": "sentry.userroleuser", + "pk": 90, + "fields": { + "date_updated": "2023-06-22T23:00:00.123Z", + "date_added": "2023-06-22T22:59:57.000Z", + "user": 12, + "role": 78 + } + } + ] diff --git a/src/sentry/backup/comparators.py b/src/sentry/backup/comparators.py index d3e00668bfc432..ebed08d9af2cbf 100644 --- a/src/sentry/backup/comparators.py +++ b/src/sentry/backup/comparators.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from collections import defaultdict -from typing import Callable, Dict, List +from typing import Callable, Dict, List, Type from dateutil import parser from django.db import models @@ -11,6 +11,8 @@ from sentry.backup.findings import ComparatorFinding, ComparatorFindingKind, InstanceID from sentry.backup.helpers import Side, get_exportable_final_derivations_of from sentry.db.models import BaseModel +from sentry.models.team import Team +from sentry.models.user import User from sentry.utils.json import JSONData @@ -209,7 +211,7 @@ class ForeignKeyComparator(JSONScrubbingComparator): left_pk_map: PrimaryKeyMap | None = None right_pk_map: PrimaryKeyMap | None = None - def __init__(self, foreign_fields: dict[str, models.base.ModelBase]): + def __init__(self, foreign_fields: dict[str, Type[models.base.Model]]): super().__init__(*(foreign_fields.keys())) self.foreign_fields = foreign_fields @@ -223,7 +225,8 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa findings = [] fields = sorted(self.fields) for f in fields: - field_model_name = "sentry." + self.foreign_fields[f].__name__.lower() + obj_name = self.foreign_fields[f]._meta.object_name.lower() # type: ignore[union-attr] + field_model_name = "sentry." + obj_name if left["fields"].get(f) is None and right["fields"].get(f) is None: continue @@ -429,6 +432,8 @@ def build_default_comparators(): comparators: ComparatorMap = defaultdict( list, { + # TODO(hybrid-cloud): actor refactor. Remove this entry when done. + "sentry.actor": [ForeignKeyComparator({"team": Team, "user_id": User})], "sentry.apitoken": [HashObfuscatingComparator("refresh_token", "token")], "sentry.apiapplication": [HashObfuscatingComparator("client_id", "client_secret")], "sentry.authidentity": [HashObfuscatingComparator("ident", "token")], diff --git a/src/sentry/backup/dependencies.py b/src/sentry/backup/dependencies.py index fe8e312ded627d..b1fc9c452e978a 100644 --- a/src/sentry/backup/dependencies.py +++ b/src/sentry/backup/dependencies.py @@ -2,7 +2,7 @@ from collections import defaultdict from enum import Enum, auto, unique -from typing import NamedTuple +from typing import NamedTuple, Type from django.db import models from django.db.models.fields.related import ForeignKey, OneToOneField @@ -35,24 +35,24 @@ class ForeignFieldKind(Enum): class ForeignField(NamedTuple): """A field that creates a dependency on another Sentry model.""" - model: models.base.ModelBase + model: Type[models.base.Model] kind: ForeignFieldKind class ModelRelations(NamedTuple): """What other models does this model depend on, and how?""" - model: models.base.ModelBase + model: Type[models.base.Model] foreign_keys: dict[str, ForeignField] silos: list[SiloMode] - def flatten(self) -> set[models.base.ModelBase]: + def flatten(self) -> set[Type[models.base.Model]]: """Returns a flat list of all related models, omitting the kind of relation they have.""" return {ff.model for ff in self.foreign_keys.values()} -def normalize_model_name(model): +def normalize_model_name(model: Type[models.base.Model]): return f"{model._meta.app_label}.{model._meta.object_name}" @@ -61,8 +61,11 @@ class DependenciesJSONEncoder(json.JSONEncoder): `ModelRelations`.""" def default(self, obj): - if isinstance(obj, models.base.ModelBase): - return normalize_model_name(obj) + if isinstance(obj, models.base.Model): + return normalize_model_name(type(obj)) + if meta := getattr(obj, "_meta", None): + # Note: done to accommodate `node.Nodestore`. + return f"{meta.app_label}.{meta.object_name}" if isinstance(obj, ForeignFieldKind): return obj.name if isinstance(obj, SiloMode): @@ -226,7 +229,7 @@ def sorted_dependencies(): "Can't resolve dependencies for %s in serialized app list." % ", ".join( normalize_model_name(m.model) - for m in sorted(skipped, key=lambda obj: normalize_model_name(obj)) + for m in sorted(skipped, key=lambda mr: normalize_model_name(mr.model)) ) ) model_dependencies_list = skipped diff --git a/src/sentry/backup/imports.py b/src/sentry/backup/imports.py index 7135f4b5e80280..861417deca6f4b 100644 --- a/src/sentry/backup/imports.py +++ b/src/sentry/backup/imports.py @@ -7,7 +7,9 @@ from django.apps import apps from django.core import management, serializers from django.db import IntegrityError, connection, transaction +from django.forms import model_to_dict +from sentry.backup.dependencies import PrimaryKeyMap, dependencies, normalize_model_name from sentry.backup.helpers import EXCLUDED_APPS @@ -27,16 +29,79 @@ class OldImportConfig(NamedTuple): def imports(src, old_config: OldImportConfig, printer=click.echo): - """CLI command wrapping the `exec_import` functionality.""" + """Imports core data for the Sentry installation.""" + + # TODO(hybrid-cloud): actor refactor. Remove this import when done. + from sentry.models.actor import Actor try: # Import / export only works in monolith mode with a consolidated db. with transaction.atomic("default"): + pk_map = PrimaryKeyMap() + deps = dependencies() + for obj in serializers.deserialize( "json", src, stream=True, use_natural_keys=old_config.use_natural_foreign_keys ): if obj.object._meta.app_label not in EXCLUDED_APPS: - obj.save() + # TODO(getsentry/team-ospo#183): This conditional should be removed once we want + # to roll out the new API to self-hosted. + if old_config.use_update_instead_of_create: + obj.save() + else: + o = obj.object + label = o._meta.label_lower + model_name = normalize_model_name(o) + for field, model_relation in deps[model_name].foreign_keys.items(): + field_id = f"{field}_id" + fk = getattr(o, field_id, None) + if fk is not None: + new_pk = pk_map.get(normalize_model_name(model_relation.model), fk) + # TODO(getsentry/team-ospo#167): Will allow missing items when we + # implement org-based filtering. + setattr(o, field_id, new_pk) + + old_pk = o.pk + o.pk = None + o.id = None + + # TODO(hybrid-cloud): actor refactor. Remove this conditional when done. + # + # `Actor` and `Team` have a direct circular dependency between them for the + # time being due to an ongoing refactor (that is, `Actor` foreign keys + # directly into `Team`, and `Team` foreign keys directly into `Actor`). If + # we use `INSERT` database calls naively, they will always fail, because one + # half of the cycle will always be missing. + # + # Because `Actor` ends up first in the dependency sorting (see: + # fixtures/backup/model_dependencies/sorted.json), a viable solution here is + # to always null out the `team_id` field of the `Actor` when we write it, + # and then make sure to circle back and update the relevant actor after we + # create the `Team` models later on (see snippet at the end of this scope). + if label == "sentry.actor": + o.team_id = None + + # TODO(getsentry/team-ospo#181): what's up with email/useremail here? Seems + # like both gets added with `sentry.user` simultaneously? Will need to make + # more robust user handling logic, and to test what happens when a UserEmail + # already exists. + if label == "sentry.useremail": + (o, _) = o.__class__.objects.get_or_create( + user=o.user, email=o.email, defaults=model_to_dict(o) + ) + pk_map.insert(model_name, old_pk, o.pk) + continue + + obj.save(force_insert=True) + pk_map.insert(model_name, old_pk, o.pk) + + # TODO(hybrid-cloud): actor refactor. Remove this conditional when done. + if label == "sentry.team": + if o.actor_id is not None: + actor = Actor.objects.get(pk=o.actor_id) + actor.team_id = o.pk + actor.save() + # For all database integrity errors, let's warn users to follow our # recommended backup/restore workflow before reraising exception. Most of # these errors come from restoring on a different version of Sentry or not restoring diff --git a/src/sentry/backup/validate.py b/src/sentry/backup/validate.py index 0c202a7c9b0edc..0a3a6720524ed8 100644 --- a/src/sentry/backup/validate.py +++ b/src/sentry/backup/validate.py @@ -129,19 +129,24 @@ def json_lines(obj: JSONData) -> list[str]: left_pk_map = PrimaryKeyMap() right_pk_map = PrimaryKeyMap() - # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both - # outputs. + # Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this + # model that we encounter later. for id, right in right_models.items(): if id.ordinal is None: raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set") - # Save the pk -> ordinal mapping on both sides, so that we can decode foreign keys into this - # model that we encounter later. left = left_models[id] - left_pk_map.insert(id.model, left["pk"], id.ordinal) + left_pk_map.insert(id.model, left_models[id]["pk"], id.ordinal) right_pk_map.insert(id.model, right["pk"], id.ordinal) + # We only perform custom comparisons and JSON diffs on non-duplicate entries that exist in both + # outputs. + for id, right in right_models.items(): + if id.ordinal is None: + raise RuntimeError("all InstanceIDs used for comparisons must have their ordinal set") + # Try comparators applicable for this specific model. + left = left_models[id] if id.model in comparators: # We take care to run ALL of the `compare()` methods on each comparator before calling # any `scrub()` methods. This ensures that, in cases where a single model uses multiple diff --git a/src/sentry/testutils/helpers/backups.py b/src/sentry/testutils/helpers/backups.py index fb0973c4e03d79..76b48e9144458a 100644 --- a/src/sentry/testutils/helpers/backups.py +++ b/src/sentry/testutils/helpers/backups.py @@ -3,26 +3,27 @@ import tempfile from pathlib import Path +from django.apps import apps from django.core.management import call_command +from django.db import connections, router, transaction from sentry.backup.comparators import ComparatorMap +from sentry.backup.dependencies import sorted_dependencies from sentry.backup.exports import OldExportConfig, exports from sentry.backup.findings import ComparatorFindings -from sentry.backup.helpers import get_exportable_final_derivations_of, get_final_derivations_of from sentry.backup.imports import OldImportConfig, imports from sentry.backup.validate import validate +from sentry.models.integrations.sentry_app import SentryApp from sentry.silo import unguarded_write from sentry.testutils.factories import get_fixture_path from sentry.utils import json from sentry.utils.json import JSONData __all__ = [ - "ValidationError", "export_to_file", - "get_final_derivations_of", - "get_exportable_final_derivations_of", "import_export_then_validate", "import_export_from_fixture_then_validate", + "ValidationError", ] NOOP_PRINTER = lambda *args, **kwargs: None @@ -46,7 +47,30 @@ def export_to_file(path: Path) -> JSONData: return output -def import_export_then_validate(method_name: str) -> JSONData: +REVERSED_DEPENDENCIES = sorted_dependencies() +REVERSED_DEPENDENCIES.reverse() + + +def clear_database_but_keep_sequences(): + """Deletes all models we care about from the database, in a sequence that ensures we get no + foreign key errors.""" + + with unguarded_write(using="default"), transaction.atomic(using="default"): + for model in REVERSED_DEPENDENCIES: + # For some reason, the tables for `SentryApp*` models don't get deleted properly here + # when using `model.objects.all().delete()`, so we have to call out to Postgres + # manually. + connection = connections[router.db_for_write(SentryApp)] + with connection.cursor() as cursor: + table = model._meta.db_table + cursor.execute(f"DELETE FROM {table:s};") + + # Clear remaining tables that are not explicitly in Sentry's own model dependency graph. + for model in set(apps.get_models()) - set(REVERSED_DEPENDENCIES): + model.objects.all().delete() + + +def import_export_then_validate(method_name: str, *, reset_pks: bool = True) -> JSONData: """Test helper that validates that dat imported from an export of the current state of the test database correctly matches the actual outputted export data.""" @@ -60,10 +84,14 @@ def import_export_then_validate(method_name: str) -> JSONData: # Write the contents of the "expected" JSON file into the now clean database. # TODO(Hybrid-Cloud): Review whether this is the correct route to apply in this case. - with unguarded_write(using="default"), open(tmp_expect) as tmp_file: - # Reset the Django database. - call_command("flush", verbosity=0, interactive=False) - imports(tmp_file, OldImportConfig(), NOOP_PRINTER) + with unguarded_write(using="default"): + if reset_pks: + call_command("flush", verbosity=0, interactive=False) + else: + clear_database_but_keep_sequences() + + with open(tmp_expect) as tmp_file: + imports(tmp_file, OldImportConfig(), NOOP_PRINTER) # Validate that the "expected" and "actual" JSON matches. actual = export_to_file(tmp_actual) diff --git a/tests/sentry/backup/test_models.py b/tests/sentry/backup/test_models.py index dbf41ee753ceec..110aaa2fecf105 100644 --- a/tests/sentry/backup/test_models.py +++ b/tests/sentry/backup/test_models.py @@ -4,10 +4,10 @@ from typing import Literal, Type from uuid import uuid4 -from django.core.management import call_command from django.utils import timezone from sentry_relay.auth import generate_key_pair +from sentry.backup.helpers import get_exportable_final_derivations_of from sentry.db.models import BaseModel from sentry.incidents.models import ( AlertRule, @@ -85,11 +85,10 @@ ScheduleType, ) from sentry.sentry_apps.apps import SentryAppUpdater -from sentry.silo import unguarded_write from sentry.snuba.models import QuerySubscription, SnubaQuery, SnubaQueryEventType from sentry.testutils.cases import TransactionTestCase from sentry.testutils.helpers.backups import ( - get_exportable_final_derivations_of, + clear_database_but_keep_sequences, import_export_then_validate, ) from sentry.utils.json import JSONData @@ -126,13 +125,11 @@ class ModelBackupTests(TransactionTestCase): comparators.""" def setUp(self): - # TODO(Hybrid-Cloud): Review whether this is the correct route to apply in this case. - with unguarded_write(using="default"): - # Reset the Django database. - call_command("flush", verbosity=0, interactive=False) + # Empty the database without resetting primary keys. + clear_database_but_keep_sequences() def import_export_then_validate(self) -> JSONData: - return import_export_then_validate(self._testMethodName) + return import_export_then_validate(self._testMethodName, reset_pks=False) def create_dashboard(self): """Re-usable dashboard object for test cases.""" diff --git a/tests/sentry/backup/test_roundtrip.py b/tests/sentry/backup/test_roundtrip.py index 229e2530cad161..05edb4c21a66c9 100644 --- a/tests/sentry/backup/test_roundtrip.py +++ b/tests/sentry/backup/test_roundtrip.py @@ -4,6 +4,7 @@ from sentry.backup.findings import ComparatorFindingKind, InstanceID from sentry.testutils.helpers.backups import ( ValidationError, + clear_database_but_keep_sequences, import_export_from_fixture_then_validate, ) from sentry.testutils.pytest.fixtures import django_db_all @@ -26,15 +27,19 @@ def test_bad_unequal_json(tmp_path): import_export_from_fixture_then_validate(tmp_path, "fresh-install.json") findings = execinfo.value.info.findings - assert len(findings) == 2 + assert len(findings) == 3 assert findings[0].kind == ComparatorFindingKind.UnequalJSON - assert findings[0].on == InstanceID("sentry.userrole", 1) + assert findings[0].on == InstanceID("sentry.useremail", 1) assert findings[0].left_pk == 1 assert findings[0].right_pk == 1 assert findings[1].kind == ComparatorFindingKind.UnequalJSON - assert findings[1].on == InstanceID("sentry.userroleuser", 1) + assert findings[1].on == InstanceID("sentry.userrole", 1) assert findings[1].left_pk == 1 assert findings[1].right_pk == 1 + assert findings[2].kind == ComparatorFindingKind.UnequalJSON + assert findings[2].on == InstanceID("sentry.userroleuser", 1) + assert findings[2].left_pk == 1 + assert findings[2].right_pk == 1 @run_backup_tests_only_on_single_db @@ -56,3 +61,22 @@ def test_date_updated_with_unzeroed_milliseconds(tmp_path): assert findings[0].right_pk == 1 assert """- "last_updated": "2023-06-22T00:00:00Z",""" in findings[0].reason assert """+ "last_updated": "2023-06-22T00:00:00.000Z",""" in findings[0].reason + + +@django_db_all(transaction=True, reset_sequences=True) +def test_good_continuing_sequences(tmp_path): + # Populate once to set the sequences. + import_export_from_fixture_then_validate(tmp_path, "fresh-install.json", DEFAULT_COMPARATORS) + + # Empty the database without resetting primary keys. + clear_database_but_keep_sequences() + + # Test that foreign keys are properly re-pointed to newly allocated primary keys as they are + # assigned. + import_export_from_fixture_then_validate(tmp_path, "fresh-install.json", DEFAULT_COMPARATORS) + + +# User models are unique and important enough that we target them with a specific test case. +@django_db_all(transaction=True) +def test_user_pk_mapping(tmp_path): + import_export_from_fixture_then_validate(tmp_path, "user-pk-mapping.json", DEFAULT_COMPARATORS)