Skip to content

Commit

Permalink
fix(backup): Fix unique collision behavior
Browse files Browse the repository at this point in the history
There were bugs and inadequacies in the `unique` collision resolution
for imports - this change fixes them. Changes made here:

- ApiToken re-use is disallowed - this is dangerous! Instead, whenever
  public keys or secrets collide, the _importer_ gets a fresh new key.
- There were no collision tests for the following models with
  "collidable" unique sets: RelayUsage, Monitor, and UserPermission.
  These have now been added. A follow-on PR will add a check to
  `test_coverage` to ensure that all such models continue to be tested
  going forward.
- `UserEmail` resolution was incorrect for the `--merge_users` case.
  When merging users, we DO NOT want the email to be overwritten by the
  incoming user!
- OrgAuthToken, ProjectKey, and ProjectOption have all had their
  importers improved to make them a bit less confusing.

Besides this, some other code has been cleaned up a bit.

Issue: getsentry/team-ospo#203
  • Loading branch information
azaslavsky committed Oct 3, 2023
1 parent 1eac634 commit 23dcf21
Show file tree
Hide file tree
Showing 12 changed files with 278 additions and 116 deletions.
27 changes: 27 additions & 0 deletions fixtures/backup/fresh-install.json
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,17 @@
"is_internal": true
}
},
{
"model":"sentry.authenticator",
"pk":1,
"fields": {
"user":1,
"created_at": "2023-06-22T22:59:55.602Z",
"last_used_at": null,
"type": 1,
"config": "\"\""
}
},
{
"model": "sentry.useremail",
"pk": 1,
Expand Down Expand Up @@ -285,5 +296,21 @@
"key": "sentry:origins",
"value": "[\"*\"]"
}
},
{
"model": "sentry.apiapplication",
"pk": 1,
"fields": {
"client_id": "e3bc5ddfe2d6c48115efa3c70bbeb0ef938f37a124a1e3a1275d804d15d111c9","client_secret": "bdcec56c5c1b3763afc292883dc0e7e6a0e7164d20435b9f4799e1df513ff940",
"owner":1,
"name": "uniquely-named-application",
"status": 0,
"allowed_origins": "",
"redirect_uris": "",
"homepage_url": null,
"privacy_url": null,
"terms_url": null,
"date_added": "2023-06-22T21:50:27.787Z"
}
}
]
26 changes: 26 additions & 0 deletions src/sentry/backup/dependencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,32 @@ def get_possible_relocation_scopes(self) -> set[RelocationScope]:
return self.model.get_possible_relocation_scopes()
return set()

def get_uniques_without_foreign_keys(self) -> list[frozenset[str]]:
"""
Gets all unique sets (that is, either standalone fields that are marked `unique=True`, or
groups of fields listed in `Meta.unique_together`) for a model, as long as those sets do not
include any fields that are foreign keys. Note that the `id` field would be trivially
included in this list for every model, and is therefore ignored.
"""

out = []
for u in self.uniques:
# Exclude unique sets that are just {"id"}, since this is true for every model and not
# very useful when searching for potential collisions.
if u == {"id"}:
continue

has_foreign_key = False
for field in u:
if self.foreign_keys.get(field):
has_foreign_key = True
break

if not has_foreign_key:
out.append(u)

return out


def get_model_name(model: type[models.Model] | models.Model) -> NormalizedModelName:
return NormalizedModelName(f"{model._meta.app_label}.{model._meta.object_name}")
Expand Down
54 changes: 28 additions & 26 deletions src/sentry/backup/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from typing import Optional, Tuple

from sentry.backup.dependencies import ImportKind
from sentry.backup.dependencies import ImportKind, dependencies, get_model_name
from sentry.backup.helpers import ImportFlags
from sentry.backup.scopes import ImportScope, RelocationScope

Expand All @@ -17,34 +17,36 @@ def write_relocation_import(
) -> Optional[Tuple[int, ImportKind]]:
# TODO(getsentry/team-ospo#190): Clean up the type checking here.
if self.get_relocation_scope() == RelocationScope.Config: # type: ignore
# Get all fields with `unique=True` for this model.
cls = self.__class__
uniq_fields = [
f.name
for f in cls._meta.get_fields() # type: ignore
if getattr(f, "unique", False) and f.name != "id"
]

# Don't use this mixin for models with multiple `unique=True` fields; write custom logic
# instead.
if len(uniq_fields) > 1:
# Get all unique sets that will potentially cause collisions.
uniq_sets = dependencies()[get_model_name(self)].get_uniques_without_foreign_keys() # type: ignore

# Don't use this mixin for models with multiple unique sets; write custom logic instead.
if len(uniq_sets) > 1:
raise ValueError(
"Cannot use `OverwritableConfigMixin` on model with multiple unique fields"
"Cannot use `OverwritableConfigMixin` on model with multiple unique sets"
)
if len(uniq_fields) == 1 and getattr(self, uniq_fields[0], None) is not None:

if len(uniq_sets) == 1:
uniq_set = uniq_sets[0]
query = dict()
query[uniq_fields[0]] = getattr(self, uniq_fields[0])
existing = self.__class__.objects.filter(**query).first() # type: ignore
if existing:
# Re-use the existing data if config overwrite is disabled.
if not flags.overwrite_configs:
return (existing.pk, ImportKind.Existing)

# We are performing an overwrite (ie, keeping the old pk, but using all of the
# imported values).
self.pk = existing.pk
self.save() # type: ignore
return (self.pk, ImportKind.Overwrite)
for uniq_field_name in uniq_set:
if getattr(self, uniq_field_name, None) is not None:
query[uniq_field_name] = getattr(self, uniq_field_name)

# If all of the fields in the unique set are NULL, we'll avoid a collision, so go
# ahead and write a new entry.
if len(query) > 0:
existing = self.__class__.objects.filter(**query).first() # type: ignore
if existing:
# Re-use the existing data if config overwrite is disabled.
if not flags.overwrite_configs:
return (existing.pk, ImportKind.Existing)

# We are performing an overwrite (ie, keeping the old pk, but using all of
# the imported values).
self.pk = existing.pk
self.save() # type: ignore
return (self.pk, ImportKind.Overwrite)

# Does not have a single colliding unique field - write as usual.
return super().write_relocation_import(scope, flags) # type: ignore
3 changes: 0 additions & 3 deletions src/sentry/models/apitoken.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,9 @@ def write_relocation_import(
query = models.Q(token=self.token) | models.Q(refresh_token=self.refresh_token)
existing = self.__class__.objects.filter(query).first()
if existing:
self.pk = existing.pk
self.expires_at = timezone.now() + DEFAULT_EXPIRATION
self.token = generate_token()
self.refresh_token = generate_token()
self.save()
return (self.pk, ImportKind.Overwrite)

return super().write_relocation_import(scope, flags)

Expand Down
11 changes: 7 additions & 4 deletions src/sentry/models/options/project_option.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,14 @@ class Meta:
def write_relocation_import(
self, _s: ImportScope, _f: ImportFlags
) -> Optional[Tuple[int, ImportKind]]:
(key, created) = self.__class__.objects.get_or_create(
# Some `ProjectOption`s for the project are automatically generated at insertion time via a
# `post_save()` hook, so they should already exist with autogenerated data. We simply need
# to update them with the correct, imported values here.
(option, _) = self.__class__.objects.get_or_create(
project=self.project, key=self.key, defaults={"value": self.value}
)
if key:
self.pk = key.pk
if option:
self.pk = option.pk
self.save()

return (self.pk, ImportKind.Inserted if created else ImportKind.Existing)
return (self.pk, ImportKind.Inserted)
14 changes: 2 additions & 12 deletions src/sentry/models/orgauthtoken.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from django.core.exceptions import ValidationError
from django.db import models
from django.forms import model_to_dict
from django.utils import timezone
from django.utils.encoding import force_str

Expand Down Expand Up @@ -83,7 +82,7 @@ def is_active(self) -> bool:
return self.date_deactivated is None

def write_relocation_import(
self, _s: ImportScope, _f: ImportFlags
self, scope: ImportScope, flags: ImportFlags
) -> Optional[Tuple[int, ImportKind]]:
# TODO(getsentry/team-ospo#190): Prevents a circular import; could probably split up the
# source module in such a way that this is no longer an issue.
Expand Down Expand Up @@ -112,16 +111,7 @@ def write_relocation_import(
self.token_hashed = hash_token(token_str)
self.token_last_characters = token_str[-4:]

(key, created) = OrgAuthToken.objects.get_or_create(
token_hashed=self.token_hashed,
token_last_characters=self.token_last_characters,
defaults=model_to_dict(self),
)
if key:
self.pk = key.pk
self.save()

return (self.pk, ImportKind.Inserted if created else ImportKind.Existing)
return super().write_relocation_import(scope, flags)


def is_org_auth_token_auth(auth: object) -> bool:
Expand Down
7 changes: 5 additions & 2 deletions src/sentry/models/projectkey.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,11 +296,14 @@ def write_relocation_import(
if not self.secret_key or matching_secret_key:
self.secret_key = self.generate_api_key()

(key, created) = ProjectKey.objects.get_or_create(
# ProjectKeys for the project are automatically generated at insertion time via a
# `post_save()` hook, so the keys for the project should already exist. We simply need to
# update them with the correct values here.
(key, _) = ProjectKey.objects.get_or_create(
project=self.project, defaults=model_to_dict(self)
)
if key:
self.pk = key.pk
self.save()

return (self.pk, ImportKind.Inserted if created else ImportKind.Existing)
return (self.pk, ImportKind.Inserted)
2 changes: 1 addition & 1 deletion src/sentry/models/relay.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


@region_silo_only_model
class RelayUsage(Model):
class RelayUsage(OverwritableConfigMixin, Model):
__relocation_scope__ = RelocationScope.Config

relay_id = models.CharField(max_length=64)
Expand Down
22 changes: 15 additions & 7 deletions src/sentry/models/useremail.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,22 @@ def normalize_before_relocation_import(
) -> Optional[int]:
from sentry.models.user import User

# If we are merging users, ignore this import and use the merged user's data.
if pk_map.get_kind(get_model_name(User), self.user_id) == ImportKind.Existing:
return None

old_user_id = self.user_id
old_pk = super().normalize_before_relocation_import(pk_map, scope, flags)
if old_pk is None:
return None

# Only preserve validation hashes in backup/restore scopes - in all others, have the user
# If we are merging users, ignore this import and use the merged user's data.
if pk_map.get_kind(get_model_name(User), old_user_id) == ImportKind.Existing:
# TODO(getsentry/team-ospo#190): Mutating `pk_map` here is a bit hacky, and we probably
# shouldn't do it.
useremail = self.__class__.objects.get(user_id=self.user_id)
pk_map.insert(get_model_name(self), self.pk, useremail.pk, ImportKind.Existing)
return None

# Only preserve validation hashes in the backup/restore scope - in all others, have the user
# verify their email again.
if scope not in {ImportScope.Config, ImportScope.Global}:
if scope != ImportScope.Global:
self.is_verified = False
self.validation_hash = get_secure_token()
self.date_hash_added = timezone.now()
Expand All @@ -116,10 +121,13 @@ def normalize_before_relocation_import(
def write_relocation_import(
self, _s: ImportScope, _f: ImportFlags
) -> Optional[Tuple[int, ImportKind]]:
# The `UserEmail` was automatically generated `post_save()`. We just need to update it with
# the data being imported. Note that if we've reached this point, we cannot be merging into
# an existing user, and are instead modifying the just-created `UserEmail` for a new one.
useremail = self.__class__.objects.get(user=self.user, email=self.email)
for f in self._meta.fields:
if f.name not in ["id", "pk"]:
setattr(useremail, f.name, getattr(self, f.name))
useremail.save()

return (useremail.pk, ImportKind.Existing)
return (useremail.pk, ImportKind.Inserted)
4 changes: 2 additions & 2 deletions src/sentry/models/userip.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ def normalize_before_relocation_import(
self.country_code = None
self.region_code = None

# Only preserve the submitted timing data in backup/restore scopes.
if scope not in {ImportScope.Config, ImportScope.Global}:
# Only preserve the submitted timing data in the backup/restore scope.
if scope != ImportScope.Global:
self.first_seen = self.last_seen = timezone.now()

return old_pk
Expand Down
20 changes: 18 additions & 2 deletions src/sentry/models/userpermission.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from __future__ import annotations

from typing import FrozenSet, List
from typing import FrozenSet, List, Optional, Tuple

from django.db import models

from sentry.backup.scopes import RelocationScope
from sentry.backup.dependencies import ImportKind
from sentry.backup.helpers import ImportFlags
from sentry.backup.scopes import ImportScope, RelocationScope
from sentry.db.models import FlexibleForeignKey, control_silo_only_model, sane_repr
from sentry.db.models.outboxes import ControlOutboxProducingModel
from sentry.models.outbox import ControlOutboxBase, OutboxCategory
Expand Down Expand Up @@ -49,3 +51,17 @@ def outboxes_for_update(self, shard_identifier: int | None = None) -> List[Contr
object_identifier=self.user_id,
)
]

def write_relocation_import(
self, _s: ImportScope, _f: ImportFlags
) -> Optional[Tuple[int, ImportKind]]:
# Ensure that we never attempt to duplicate `UserPermission` entries, even when the
# `merge_users` flag is enabled, as they must always be globally unique per user.
(up, created) = self.__class__.objects.get_or_create(
user_id=self.user_id, permission=self.permission
)
if up:
self.pk = up.pk
self.save()

return (self.pk, ImportKind.Inserted if created else ImportKind.Existing)
Loading

0 comments on commit 23dcf21

Please sign in to comment.