Skip to content

Commit

Permalink
feat(backup): Enable import over RPC (#56689)
Browse files Browse the repository at this point in the history
feat(backup): Enable imports over RPC

This finishes out the work started in #57740 and enables imports over
RPC as well. Like that PR, imports are already done on a sequential, per
model basis, so this change just consists of moving every such call
across an RPC boundary.

Closes getsentry/team-ospo#185
Closes getsentry/team-ospo#196
Closes getsentry/team-ospo#202
  • Loading branch information
azaslavsky authored Oct 18, 2023
1 parent 9bee5dd commit 57686ae
Show file tree
Hide file tree
Showing 20 changed files with 1,280 additions and 732 deletions.
32 changes: 0 additions & 32 deletions fixtures/backup/model_dependencies/detailed.json
Original file line number Diff line number Diff line change
Expand Up @@ -5851,38 +5851,6 @@
"table_name": "sentry_userrole_users",
"uniques": []
},
"sessions.session": {
"dangling": false,
"foreign_keys": {},
"model": "sessions.session",
"relocation_dependencies": [],
"relocation_scope": "Excluded",
"silos": [
"Monolith"
],
"table_name": "django_session",
"uniques": [
[
"session_key"
]
]
},
"sites.site": {
"dangling": false,
"foreign_keys": {},
"model": "sites.site",
"relocation_dependencies": [],
"relocation_scope": "Excluded",
"silos": [
"Monolith"
],
"table_name": "django_site",
"uniques": [
[
"domain"
]
]
},
"social_auth.usersocialauth": {
"dangling": false,
"foreign_keys": {
Expand Down
2 changes: 0 additions & 2 deletions fixtures/backup/model_dependencies/flat.json
Original file line number Diff line number Diff line change
Expand Up @@ -807,8 +807,6 @@
"sentry.user",
"sentry.userrole"
],
"sessions.session": [],
"sites.site": [],
"social_auth.usersocialauth": [
"sentry.user"
]
Expand Down
2 changes: 0 additions & 2 deletions fixtures/backup/model_dependencies/sorted.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@
"sentry.userpermission",
"sentry.userrole",
"sentry.userroleuser",
"sessions.session",
"sites.site",
"social_auth.usersocialauth",
"sentry.savedsearch",
"sentry.release",
Expand Down
2 changes: 0 additions & 2 deletions fixtures/backup/model_dependencies/truncate.json
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@
"sentry_userpermission",
"sentry_userrole",
"sentry_userrole_users",
"django_session",
"django_site",
"social_auth_usersocialauth",
"sentry_savedsearch",
"sentry_release",
Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,6 @@ module = [
"sentry.services.smtp",
"sentry.shared_integrations.client.base",
"sentry.shared_integrations.client.proxy",
"sentry.silo.base",
"sentry.similarity.backends.dummy",
"sentry.similarity.features",
"sentry.snuba.discover",
Expand Down
5 changes: 5 additions & 0 deletions src/sentry/backup/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,11 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
model_iterator = app_config.get_models()

for model in model_iterator:
# Ignore some native Django models, since other models don't reference them and we don't
# really use them for business logic.
if model._meta.app_label in {"sessions", "sites"}:
continue

foreign_keys: dict[str, ForeignField] = dict()
uniques: set[frozenset[str]] = {
frozenset(combo) for combo in model._meta.unique_together
Expand Down
10 changes: 2 additions & 8 deletions src/sentry/backup/exports.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
from __future__ import annotations

import io
from typing import BinaryIO, Type
from typing import BinaryIO

import click
from django.db.models.base import Model

from sentry.backup.dependencies import (
PrimaryKeyMap,
Expand Down Expand Up @@ -93,11 +92,6 @@ def _export(
else:
raise ValueError("Filter arguments must only apply to `Organization` or `User` models")

def get_exporter_for_model(model: Type[Model]):
if SiloMode.CONTROL in model._meta.silo_limit.modes: # type: ignore
return import_export_service.export_by_model
return ImportExportService.get_local_implementation().export_by_model # type: ignore

# TODO(getsentry/team-ospo#190): Another optimization opportunity to use a generator with ijson # to print the JSON objects in a streaming manner.
for model in sorted_dependencies():
from sentry.db.models.base import BaseModel
Expand All @@ -116,7 +110,7 @@ def get_exporter_for_model(model: Type[Model]):
continue

dep_models = {get_model_name(d) for d in model_relations.get_dependencies_for_relocation()}
export_by_model = get_exporter_for_model(model)
export_by_model = ImportExportService.get_exporter_for_model(model)
result = export_by_model(
model_name=str(model_name),
scope=RpcExportScope.into_rpc(scope),
Expand Down
143 changes: 61 additions & 82 deletions src/sentry/backup/imports.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,27 +3,46 @@
from typing import BinaryIO, Iterator, Optional, Tuple, Type

import click
from django.conf import settings
from django.core import serializers
from django.core.exceptions import ValidationError as DjangoValidationError
from django.db import IntegrityError, connections, router, transaction
from django.db import transaction
from django.db.models.base import Model
from rest_framework.serializers import ValidationError as DjangoRestFrameworkValidationError

from sentry.backup.dependencies import NormalizedModelName, PrimaryKeyMap, get_model, get_model_name
from sentry.backup.helpers import EXCLUDED_APPS, Filter, ImportFlags, decrypt_encrypted_tarball
from sentry.backup.dependencies import (
NormalizedModelName,
PrimaryKeyMap,
dependencies,
get_model_name,
)
from sentry.backup.helpers import Filter, ImportFlags, decrypt_encrypted_tarball
from sentry.backup.scopes import ImportScope
from sentry.silo import unguarded_write
from sentry.services.hybrid_cloud.import_export.model import (
RpcFilter,
RpcImportError,
RpcImportErrorKind,
RpcImportFlags,
RpcImportScope,
RpcPrimaryKeyMap,
)
from sentry.services.hybrid_cloud.import_export.service import ImportExportService
from sentry.silo.base import SiloMode
from sentry.silo.safety import unguarded_write
from sentry.utils import json
from sentry.utils.env import is_split_db

__all__ = (
"ImportingError",
"import_in_user_scope",
"import_in_organization_scope",
"import_in_config_scope",
"import_in_global_scope",
)


class ImportingError(Exception):
def __init__(self, context: RpcImportError) -> None:
self.context = context


def _import(
src: BinaryIO,
scope: ImportScope,
Expand All @@ -45,6 +64,11 @@ def _import(
from sentry.models.organizationmember import OrganizationMember
from sentry.models.user import User

if SiloMode.get_current_mode() == SiloMode.CONTROL:
errText = "Imports must be run in REGION or MONOLITH instances only"
printer(errText, err=True)
raise RuntimeError(errText)

flags = flags if flags is not None else ImportFlags()
user_model_name = get_model_name(User)
org_model_name = get_model_name(Organization)
Expand Down Expand Up @@ -154,83 +178,38 @@ def yield_json_models(src) -> Iterator[Tuple[NormalizedModelName, str]]:
# of how we do atomicity: on a per-model (if using multiple dbs) or global (if using a single
# db) basis.
def do_write():
allowed_relocation_scopes = scope.value
pk_map = PrimaryKeyMap()
for (batch_model_name, batch) in yield_json_models(src):
model = get_model(batch_model_name)
if model is None:
raise ValueError("Unknown model name")

using = router.db_for_write(model)
with transaction.atomic(using=using):
count = 0
for obj in serializers.deserialize("json", batch, use_natural_keys=False):
o = obj.object
if o._meta.app_label not in EXCLUDED_APPS or o:
if o.get_possible_relocation_scopes() & allowed_relocation_scopes:
o = obj.object
model_name = get_model_name(o)
for f in filters:
if f.model == type(o) and getattr(o, f.field, None) not in f.values:
break
else:
# We can only be sure `get_relocation_scope()` will be correct if it
# is fired AFTER normalization, as some `get_relocation_scope()`
# methods rely on being able to correctly resolve foreign keys,
# which is only possible after normalization.
old_pk = o.normalize_before_relocation_import(pk_map, scope, flags)
if old_pk is None:
continue

# Now that the model has been normalized, we can ensure that this
# particular instance has a `RelocationScope` that permits
# importing.
if not o.get_relocation_scope() in allowed_relocation_scopes:
continue

written = o.write_relocation_import(scope, flags)
if written is None:
continue

new_pk, import_kind = written
slug = getattr(o, "slug", None)
pk_map.insert(model_name, old_pk, new_pk, import_kind, slug)
count += 1

# If we wrote at least one model, make sure to update the sequences too.
if count > 0:
table = o._meta.db_table
seq = f"{table}_id_seq"
with connections[using].cursor() as cursor:
cursor.execute(f"SELECT setval(%s, (SELECT MAX(id) FROM {table}))", [seq])

try:
if len(settings.DATABASES) == 1:
# TODO(getsentry/team-ospo#185): This is currently untested in single-db mode. Fix ASAP!
with unguarded_write(using="default"), transaction.atomic("default"):
do_write()
else:
for model_name, json_data in yield_json_models(src):
model_relations = dependencies().get(model_name)
if not model_relations:
continue

dep_models = {
get_model_name(d) for d in model_relations.get_dependencies_for_relocation()
}
import_by_model = ImportExportService.get_importer_for_model(model_relations.model)
result = import_by_model(
model_name=str(model_name),
scope=RpcImportScope.into_rpc(scope),
flags=RpcImportFlags.into_rpc(flags),
filter_by=[RpcFilter.into_rpc(f) for f in filters],
pk_map=RpcPrimaryKeyMap.into_rpc(pk_map.partition(dep_models)),
json_data=json_data,
)

if isinstance(result, RpcImportError):
printer(result.pretty(), err=True)
if result.get_kind() == RpcImportErrorKind.IntegrityError:
warningText = ">> Are you restoring from a backup of the same version of Sentry?\n>> Are you restoring onto a clean database?\n>> If so then this IntegrityError might be our fault, you can open an issue here:\n>> https://github.com/getsentry/sentry/issues/new/choose"
printer(warningText, err=True)
raise ImportingError(result)
pk_map.extend(result.mapped_pks)

if SiloMode.get_current_mode() == SiloMode.MONOLITH and not is_split_db():
with unguarded_write(using="default"), transaction.atomic(using="default"):
do_write()

# For all database integrity errors, let's warn users to follow our
# recommended backup/restore workflow before reraising exception. Most of
# these errors come from restoring on a different version of Sentry or not restoring
# on a clean install.
except IntegrityError as e:
warningText = ">> Are you restoring from a backup of the same version of Sentry?\n>> Are you restoring onto a clean database?\n>> If so then this IntegrityError might be our fault, you can open an issue here:\n>> https://github.com/getsentry/sentry/issues/new/choose"
printer(
warningText,
err=True,
)
raise (e)

# Calls to `write_relocation_import` may fail validation and throw either a
# `DjangoValidationError` when a call to `.full_clean()` failed, or a
# `DjangoRestFrameworkValidationError` when a call to a custom DRF serializer failed. This
# exception catcher converts instances of the former to the latter.
except DjangoValidationError as e:
errs = {field: error for field, error in e.message_dict.items()}
raise DjangoRestFrameworkValidationError(errs) from e
else:
do_write()


def import_in_user_scope(
Expand Down
14 changes: 8 additions & 6 deletions src/sentry/runner/commands/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,6 @@
from sentry.backup.comparators import get_default_comparators
from sentry.backup.findings import FindingJSONEncoder
from sentry.backup.helpers import ImportFlags
from sentry.backup.imports import (
import_in_config_scope,
import_in_global_scope,
import_in_organization_scope,
import_in_user_scope,
)
from sentry.backup.validate import validate
from sentry.runner.decorators import configuration
from sentry.utils import json
Expand Down Expand Up @@ -136,6 +130,8 @@ def import_users(src, decrypt_with, filter_usernames, merge_users, silent):
Import the Sentry users from an exported JSON file.
"""

from sentry.backup.imports import import_in_user_scope

import_in_user_scope(
src,
decrypt_with=decrypt_with,
Expand Down Expand Up @@ -173,6 +169,8 @@ def import_organizations(src, decrypt_with, filter_org_slugs, merge_users, silen
Import the Sentry organizations, and all constituent Sentry users, from an exported JSON file.
"""

from sentry.backup.imports import import_in_organization_scope

import_in_organization_scope(
src,
decrypt_with=decrypt_with,
Expand Down Expand Up @@ -208,6 +206,8 @@ def import_config(src, decrypt_with, merge_users, overwrite_configs, silent):
Import all configuration and administrator accounts needed to set up this Sentry instance.
"""

from sentry.backup.imports import import_in_config_scope

import_in_config_scope(
src,
decrypt_with=decrypt_with,
Expand Down Expand Up @@ -236,6 +236,8 @@ def import_global(src, decrypt_with, silent, overwrite_configs):
Import all Sentry data from an exported JSON file.
"""

from sentry.backup.imports import import_in_global_scope

import_in_global_scope(
src,
decrypt_with=decrypt_with,
Expand Down
Loading

0 comments on commit 57686ae

Please sign in to comment.