Skip to content

Commit

Permalink
fix(backup): Fix query logic for dangling model exports
Browse files Browse the repository at this point in the history
There are a small number of models that have no unambiguous direct
connection to their relocation scope's root model - these are called
"dangling" models. The key factor that defines them, and makes them
difficult to handle, is that we cannot use our "query already exported
foreign keys" filtering methodology to select only the models relevant
to our export targets, because these models have no foreign keys that
connect them back to the root of that target. For example,
`TimeSeriesSnapshot` has no foreign keys at all, see:
https://tinyurl.com/27z4x6tk.

In cases like the one above, we ended up exporting ALL of the
`TimeSeriesSnapshot`s in the database - clearly a very bad outcome when
we only want to export those related to a specific org! A better
approach is to define custom filtering logic for these models, thereby
enabling them to use "adjacent" models in the model graph to select only
models that we care about for a given export. In the example above, we
query all `Incident`s filtered down by our previous exports to get a
sneak-peek at the set of `IncidentSnapshot`s (a set that is currently
empty due to going in reverse dependency order), then use that
information to work backwards to grab the `TimeSeriesSnapshot`s we need.

The upshot is that this commit introduces a generic method for
constructing filtered queries for a specific model, the overridable
`query_for_relocation_export`.

Issue: getsentry/team-ospo#203
  • Loading branch information
azaslavsky committed Oct 11, 2023
1 parent e40fb64 commit 806b10c
Show file tree
Hide file tree
Showing 13 changed files with 357 additions and 74 deletions.
229 changes: 229 additions & 0 deletions fixtures/backup/model_dependencies/detailed.json

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions fixtures/backup/model_dependencies/sorted.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"sentry.file",
"sentry.broadcast",
"sentry.deletedorganization",
"sentry.email",
"sentry.organization",
"sentry.organizationmapping",
"sentry.identityprovider",
Expand All @@ -40,12 +39,10 @@
"sentry.controltombstone",
"sentry.projecttransactionthresholdoverride",
"sentry.projecttransactionthreshold",
"sentry.useremail",
"sentry.userip",
"sentry.userpermission",
"sentry.userrole",
"sentry.userroleuser",
"sentry.timeseriessnapshot",
"sentry.discoversavedquery",
"sentry.monitor",
"sentry.monitorlocation",
Expand Down Expand Up @@ -80,6 +77,7 @@
"sentry.eventattachment",
"sentry.environment",
"sentry.environmentproject",
"sentry.email",
"sentry.customdynamicsamplingrule",
"sentry.customdynamicsamplingruleproject",
"sentry.distribution",
Expand Down Expand Up @@ -188,6 +186,7 @@
"sentry.rulefirehistory",
"sentry.servicehook",
"sentry.teamreplica",
"sentry.useremail",
"sentry.userreport",
"sentry.notificationaction",
"sentry.alertrule",
Expand All @@ -213,9 +212,10 @@
"sentry.dashboardwidgetquery",
"sentry.sentryappavatar",
"sentry.pendingincidentsnapshot",
"sentry.incidentsnapshot",
"sentry.timeseriessnapshot",
"sentry.incidentactivity",
"sentry.incidentsubscription",
"sentry.incidenttrigger",
"sentry.monitorincident"
"sentry.monitorincident",
"sentry.incidentsnapshot"
]
10 changes: 5 additions & 5 deletions fixtures/backup/model_dependencies/truncate.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
"sentry_file",
"sentry_broadcast",
"sentry_deletedorganization",
"sentry_email",
"sentry_organization",
"sentry_organizationmapping",
"sentry_identityprovider",
Expand All @@ -40,12 +39,10 @@
"sentry_controltombstone",
"sentry_projecttransactionthresholdoverride",
"sentry_projecttransactionthreshold",
"sentry_useremail",
"sentry_userip",
"sentry_userpermission",
"sentry_userrole",
"sentry_userrole_users",
"sentry_timeseriessnapshot",
"sentry_discoversavedquery",
"sentry_monitor",
"sentry_monitorlocation",
Expand Down Expand Up @@ -80,6 +77,7 @@
"sentry_eventattachment",
"sentry_environment",
"sentry_environmentproject",
"sentry_email",
"sentry_customdynamicsamplingrule",
"sentry_customdynamicsamplingruleproject",
"sentry_distribution",
Expand Down Expand Up @@ -188,6 +186,7 @@
"sentry_rulefirehistory",
"sentry_servicehook",
"sentry_teamreplica",
"sentry_useremail",
"sentry_userreport",
"sentry_notificationaction",
"sentry_alertrule",
Expand All @@ -213,9 +212,10 @@
"sentry_dashboardwidgetquery",
"sentry_sentryappavatar",
"sentry_pendingincidentsnapshot",
"sentry_incidentsnapshot",
"sentry_timeseriessnapshot",
"sentry_incidentactivity",
"sentry_incidentsubscription",
"sentry_incidenttrigger",
"sentry_monitorincident"
"sentry_monitorincident",
"sentry_incidentsnapshot"
]
14 changes: 10 additions & 4 deletions src/sentry/backup/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ class ModelRelations:
dangling: Optional[bool]
foreign_keys: dict[str, ForeignField]
model: Type[models.base.Model]
relocation_dependencies: set[Type[models.base.Model]]
relocation_scope: RelocationScope | set[RelocationScope]
silos: list[SiloMode]
table_name: str
Expand Down Expand Up @@ -421,6 +422,8 @@ def dependencies() -> dict[NormalizedModelName, ModelRelations]:
dangling=None,
foreign_keys=foreign_keys,
model=model,
# We'll fill this in after the entire dictionary is populated.
relocation_dependencies=set(),
relocation_scope=getattr(model, "__relocation_scope__", RelocationScope.Excluded),
silos=list(
getattr(model._meta, "silo_limit", ModelSiloLimit(SiloMode.MONOLITH)).modes
Expand Down Expand Up @@ -491,9 +494,12 @@ def resolve_dangling(seen: Set[NormalizedModelName], model_name: NormalizedModel
seen.remove(model_name)
return model_relations.dangling

for model_name in model_dependencies_dict.keys():
if model_name not in relocation_root_models:
resolve_dangling(set(), model_name)
for model_name, model_relations in model_dependencies_dict.items():
resolve_dangling(set(), model_name)
model_relations.relocation_dependencies = {
model_dependencies_dict[NormalizedModelName(rd)].model
for rd in getattr(model_relations.model, "__relocation_dependencies__", set())
}

return model_dependencies_dict

Expand Down Expand Up @@ -524,7 +530,7 @@ def sorted_dependencies() -> list[Type[models.base.Model]]:
changed = False
while model_dependencies_dict:
model_deps = model_dependencies_dict.pop()
deps = model_deps.flatten()
deps = model_deps.flatten().union(model_deps.relocation_dependencies)
model = model_deps.model

# If all of the models in the dependency list are either already
Expand Down
64 changes: 16 additions & 48 deletions src/sentry/backup/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,9 @@ def _export(
"""

# Import here to prevent circular module resolutions.
from sentry.models.email import Email
from sentry.models.organization import Organization
from sentry.models.organizationmember import OrganizationMember
from sentry.models.user import User
from sentry.models.useremail import UserEmail

allowed_relocation_scopes = scope.value
pk_map = PrimaryKeyMap()
Expand All @@ -67,32 +65,27 @@ def _export(
filters = []
if filter_by is not None:
filters.append(filter_by)
user_pks = []

if filter_by.model == Organization:
org_pks = [o.pk for o in Organization.objects.filter(slug__in=filter_by.values)]
user_pks = [
o.user_id
for o in OrganizationMember.objects.filter(organization_id__in=set(org_pks))
]
filters.append(Filter(User, "pk", set(user_pks)))
elif filter_by.model == User:
if filter_by.field == "pk":
user_pks = [u.pk for u in User.objects.filter(id__in=filter_by.values)]
elif filter_by.field == "username":
user_pks = [u.pk for u in User.objects.filter(username__in=filter_by.values)]
else:
if filter_by.field not in {"pk", "id", "slug"}:
raise ValueError(
"Filter arguments must only apply to `User`'s `pk`' or `username` fields"
"Filter arguments must only apply to `Organization`'s `slug` field"
)

org_pks = set(
Organization.objects.filter(slug__in=filter_by.values).values_list("id", flat=True)
)
user_pks = set(
OrganizationMember.objects.filter(organization_id__in=org_pks).values_list(
"user_id", flat=True
)
)
filters.append(Filter(User, "pk", set(user_pks)))
elif filter_by.model == User:
if filter_by.field not in {"pk", "id", "username"}:
raise ValueError("Filter arguments must only apply to `User`'s `username` field")
else:
raise ValueError("Filter arguments must only apply to `Organization` or `User` models")

# `Sentry.Email` models don't have any explicit dependencies on `Sentry.User`, so we need to
# find them manually via `UserEmail`.
emails = [ue.email for ue in UserEmail.objects.filter(user__in=user_pks)]
filters.append(Filter(Email, "email", set(emails)))

def filter_objects(queryset_iterator):
# Intercept each value from the queryset iterator, ensure that it has the correct relocation
# scope and that all of its dependencies have already been exported. If they have, store it
Expand Down Expand Up @@ -128,17 +121,12 @@ def filter_objects(queryset_iterator):

def yield_objects():
from sentry.db.models.base import BaseModel
from sentry.models.team import Team

deps = dependencies()

# Collate the objects to be serialized.
for model in sorted_dependencies():
if not issubclass(model, BaseModel):
continue

model_name = get_model_name(model)
model_relations = deps[model_name]
possible_relocation_scopes = model.get_possible_relocation_scopes()
includable = possible_relocation_scopes & allowed_relocation_scopes
if not includable or model._meta.proxy:
Expand All @@ -155,27 +143,7 @@ def yield_objects():
if f.model == model:
query[f.field + "__in"] = f.values
q &= Q(**query)

# TODO: actor refactor. Remove this conditional. For now, we do no filtering on
# teams.
if model_name != get_model_name(Team):
# Create a filter for each possible FK reference to constrain the amount of data
# being sent over from the database. We only want models where every FK field
# references into a model whose PK we've already exported (or `NULL`, if the FK
# field is nullable).
for field_name, foreign_field in model_relations.foreign_keys.items():
foreign_field_model_name = get_model_name(foreign_field.model)
matched_fks = set(pk_map.get_pks(foreign_field_model_name))
matched_fks_query = dict()
if len(matched_fks) > 0:
matched_fks_query[field_name + "__in"] = matched_fks

if foreign_field.nullable:
match_on_null_query = dict()
match_on_null_query[field_name + "__isnull"] = True
q &= Q(**matched_fks_query) | Q(**match_on_null_query)
else:
q &= Q(**matched_fks_query)
q = model.query_for_relocation_export(q, pk_map)

pk_name = model._meta.pk.name # type: ignore
queryset = model._base_manager.filter(q).order_by(pk_name)
Expand Down
27 changes: 27 additions & 0 deletions src/sentry/db/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class Meta:
abstract = True

__relocation_scope__: RelocationScope | set[RelocationScope]
__relocation_dependencies__: set[str]

objects: BaseManager[Self] = BaseManager()

Expand Down Expand Up @@ -134,6 +135,32 @@ def get_possible_relocation_scopes(cls) -> set[RelocationScope]:
else {cls.__relocation_scope__}
)

@classmethod
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
""" """

model_name = get_model_name(cls)
model_relations = dependencies()[model_name]

# Create a filter for each possible FK reference to constrain the amount of data being sent
# over from the database. We only want models where every FK field references into a model
# whose PK we've already exported (or `NULL`, if the FK field is nullable).
for field_name, foreign_field in model_relations.foreign_keys.items():
foreign_field_model_name = get_model_name(foreign_field.model)
matched_fks = set(pk_map.get_pks(foreign_field_model_name))
matched_fks_query = dict()
if len(matched_fks) > 0:
matched_fks_query[field_name + "__in"] = matched_fks

if foreign_field.nullable:
match_on_null_query = dict()
match_on_null_query[field_name + "__isnull"] = True
q &= models.Q(**matched_fks_query) | models.Q(**match_on_null_query)
else:
q &= models.Q(**matched_fks_query)

return q

def normalize_before_relocation_import(
self, pk_map: PrimaryKeyMap, _s: ImportScope, _f: ImportFlags
) -> int | None:
Expand Down
11 changes: 10 additions & 1 deletion src/sentry/incidents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from django.db.models.signals import post_delete, post_save
from django.utils import timezone

from sentry.backup.dependencies import PrimaryKeyMap
from sentry.backup.dependencies import PrimaryKeyMap, get_model_name
from sentry.backup.helpers import ImportFlags
from sentry.backup.scopes import ImportScope, RelocationScope
from sentry.db.models import (
Expand Down Expand Up @@ -261,6 +261,7 @@ class Meta:
@region_silo_only_model
class TimeSeriesSnapshot(Model):
__relocation_scope__ = RelocationScope.Organization
__relocation_dependencies__ = {"sentry.Incident"}

start = models.DateTimeField()
end = models.DateTimeField()
Expand All @@ -272,6 +273,14 @@ class Meta:
app_label = "sentry"
db_table = "sentry_timeseriessnapshot"

@classmethod
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
pks = IncidentSnapshot.objects.filter(
incident__in=pk_map.get_pks(get_model_name(Incident))
).values_list("event_stats_snapshot_id", flat=True)

return q & models.Q(pk__in=pks)


class IncidentActivityType(Enum):
CREATED = 1
Expand Down
9 changes: 8 additions & 1 deletion src/sentry/models/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from django.forms import model_to_dict
from rest_framework import serializers

from sentry.backup.dependencies import ImportKind
from sentry.backup.dependencies import ImportKind, PrimaryKeyMap
from sentry.backup.helpers import ImportFlags
from sentry.backup.scopes import ImportScope, RelocationScope
from sentry.db.models import Model, region_silo_only_model
Expand Down Expand Up @@ -148,6 +148,13 @@ def get_actor_identifier(self):
# essentially forwards request to ActorTuple.get_actor_identifier
return self.get_actor_tuple().get_actor_identifier()

@classmethod
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
# Actors that can have both their `user` and `team` value set to null. Exclude such actors # from the export.
q = super().query_for_relocation_export(q, pk_map)

return q & ~models.Q(team__isnull=True, user_id__isnull=True)

# TODO(hybrid-cloud): actor refactor. Remove this method when done.
def write_relocation_import(
self, scope: ImportScope, flags: ImportFlags
Expand Down
16 changes: 15 additions & 1 deletion src/sentry/models/email.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from django.utils import timezone
from django.utils.translation import gettext_lazy as _

from sentry.backup.dependencies import ImportKind
from sentry.backup.dependencies import ImportKind, PrimaryKeyMap, get_model_name
from sentry.backup.helpers import ImportFlags
from sentry.backup.scopes import ImportScope, RelocationScope
from sentry.db.models import CIEmailField, Model, control_silo_only_model, sane_repr
Expand All @@ -21,6 +21,7 @@ class Email(Model):
"""

__relocation_scope__ = RelocationScope.User
__relocation_dependencies__ = {"sentry.User"}

email = CIEmailField(_("email address"), unique=True, max_length=75)
date_added = models.DateTimeField(default=timezone.now)
Expand All @@ -31,6 +32,19 @@ class Meta:

__repr__ = sane_repr("email")

@classmethod
def query_for_relocation_export(cls, q: models.Q, pk_map: PrimaryKeyMap) -> models.Q:
from sentry.models.user import User
from sentry.models.useremail import UserEmail

# `Sentry.Email` models don't have any explicit dependencies on `Sentry.User`, so we need to
# find them manually via `UserEmail`.
emails = UserEmail.objects.filter(
user_id__in=pk_map.get_pks(get_model_name(User))
).values_list("email", flat=True)

return q & models.Q(email__in=emails)

def write_relocation_import(
self, _s: ImportScope, _f: ImportFlags
) -> Optional[Tuple[int, ImportKind]]:
Expand Down
6 changes: 6 additions & 0 deletions src/sentry/models/team.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,12 @@ def get_member_actor_ids(self):

return owner_ids

# TODO(hybrid-cloud): actor refactor. Remove this method when done. For now, we do no filtering
# on teams.
@classmethod
def query_for_relocation_export(cls, q: models.Q, _: PrimaryKeyMap) -> models.Q:
return q

# TODO(hybrid-cloud): actor refactor. Remove this method when done.
def normalize_before_relocation_import(
self, pk_map: PrimaryKeyMap, scope: ImportScope, flags: ImportFlags
Expand Down
Loading

0 comments on commit 806b10c

Please sign in to comment.