From 3e3a46c66d7f079bdaf4882995ba34d6e2b1fbf8 Mon Sep 17 00:00:00 2001 From: Alex Zaslavsky Date: Fri, 8 Sep 2023 14:38:04 -0700 Subject: [PATCH] feat(backup): Generate new UUIDs and secrets when needed (#55826) UUIDs should always be unique (it's right in the name!). When we import a `UUIDField`, we should generate a new UUID for it, and update all relevant references. Similarly, we enforce `ProjectKey` API token uniqueness. When importing a colliding token, which will likely happen, we generate new API keys for that project. This may break some user flows during cross-region relocation, so there is a note to revisit this decision later. Issue: getsentry/team-ospo#193 --- src/sentry/backup/comparators.py | 96 ++++++++++- src/sentry/backup/findings.py | 14 ++ src/sentry/incidents/models.py | 29 +++- src/sentry/models/projectkey.py | 13 +- src/sentry/monitors/models.py | 15 +- tests/sentry/backup/test_comparators.py | 205 ++++++++++++++++++++++++ tests/sentry/backup/test_imports.py | 32 ++++ 7 files changed, 400 insertions(+), 4 deletions(-) diff --git a/src/sentry/backup/comparators.py b/src/sentry/backup/comparators.py index 75d11d14f0d270..7868f59377fa58 100644 --- a/src/sentry/backup/comparators.py +++ b/src/sentry/backup/comparators.py @@ -1,5 +1,6 @@ from __future__ import annotations +import re from abc import ABC, abstractmethod from collections import defaultdict from functools import lru_cache @@ -390,6 +391,93 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa return [] +class RegexComparator(JSONScrubbingComparator, ABC): + """Comparator that ensures that both sides match a certain regex.""" + + def __init__(self, regex: re.Pattern, *fields: str): + self.regex = regex + super().__init__(*fields) + + def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]: + findings = [] + fields = sorted(self.fields) + for f in fields: + if left["fields"].get(f) is None and right["fields"].get(f) is None: + continue + + lv = left["fields"][f] + if not self.regex.fullmatch(lv): + findings.append( + ComparatorFinding( + kind=self.get_kind(), + on=on, + left_pk=left["pk"], + right_pk=right["pk"], + reason=f"""the left value ("{lv}") of `{f}` was not matched by this regex: {self.regex.pattern}""", + ) + ) + + rv = right["fields"][f] + if not self.regex.fullmatch(rv): + findings.append( + ComparatorFinding( + kind=self.get_kind(), + on=on, + left_pk=left["pk"], + right_pk=right["pk"], + reason=f"""the right value ("{rv}") of `{f}` was not matched by this regex: {self.regex.pattern}""", + ) + ) + return findings + + +class SecretHexComparator(RegexComparator): + """Certain 16-byte hexadecimal API keys are regenerated during an import operation.""" + + def __init__(self, bytes: int, *fields: str): + super().__init__(re.compile(f"""^[0-9a-f]{{{bytes * 2}}}$"""), *fields) + + +# Note: we could also use the `uuid` Python uuid module for this, but it is finicky and accepts some +# weird syntactic variations that are not very common and may cause weird failures when they are +# rejected elsewhere. +class UUID4Comparator(RegexComparator): + """UUIDs must be regenerated on import (otherwise they would not be unique...). This comparator ensures that they retain their validity, but are not equivalent.""" + + def __init__(self, *fields: str): + super().__init__( + re.compile( + "^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}\\Z$", re.I + ), + *fields, + ) + + def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]: + # First, ensure that the two sides are not equivalent. + findings = [] + fields = sorted(self.fields) + for f in fields: + if left["fields"].get(f) is None and right["fields"].get(f) is None: + continue + + lv = left["fields"][f] + rv = right["fields"][f] + if lv == rv: + findings.append( + ComparatorFinding( + kind=self.get_kind(), + on=on, + left_pk=left["pk"], + right_pk=right["pk"], + reason=f"""the left value ({lv}) of the UUID field `{f}` was equal to the right value ({rv})""", + ) + ) + + # Now, make sure both UUIDs are valid. + findings.extend(super().compare(on, left, right)) + return findings + + def auto_assign_datetime_equality_comparators(comps: ComparatorMap) -> None: """Automatically assigns the DateAddedComparator to any `DateTimeField` that is not already claimed by the `DateUpdatedComparator`.""" @@ -469,15 +557,21 @@ def get_default_comparators(): "sentry.apiapplication": [HashObfuscatingComparator("client_id", "client_secret")], "sentry.authidentity": [HashObfuscatingComparator("ident", "token")], "sentry.alertrule": [DateUpdatedComparator("date_modified")], + "sentry.incident": [UUID4Comparator("detection_uuid")], + "sentry.incidentactivity": [UUID4Comparator("notification_uuid")], "sentry.incidenttrigger": [DateUpdatedComparator("date_modified")], "sentry.integration": [DateUpdatedComparator("date_updated")], + "sentry.monitor": [UUID4Comparator("guid")], "sentry.orgauthtoken": [ HashObfuscatingComparator("token_hashed", "token_last_characters") ], "sentry.organization": [AutoSuffixComparator("slug")], "sentry.organizationintegration": [DateUpdatedComparator("date_updated")], "sentry.organizationmember": [HashObfuscatingComparator("token")], - "sentry.projectkey": [HashObfuscatingComparator("public_key", "secret_key")], + "sentry.projectkey": [ + HashObfuscatingComparator("public_key", "secret_key"), + SecretHexComparator(16, "public_key", "secret_key"), + ], "sentry.querysubscription": [DateUpdatedComparator("date_updated")], "sentry.relay": [HashObfuscatingComparator("relay_id", "public_key")], "sentry.relayusage": [HashObfuscatingComparator("relay_id", "public_key")], diff --git a/src/sentry/backup/findings.py b/src/sentry/backup/findings.py index 91ceccebe72a59..61214a905792e7 100644 --- a/src/sentry/backup/findings.py +++ b/src/sentry/backup/findings.py @@ -83,6 +83,20 @@ class ComparatorFindingKind(IntEnum): # `None`. IgnoredComparatorExistenceCheck = auto() + # Secret token fields did not match their regex specification. + SecretHexComparator = auto() + + # Failed to compare a secret token field because one of the fields being compared was not + # present or `None`. + SecretHexComparatorExistenceCheck = auto() + + # UUID4 fields did not match their regex specification. + UUID4Comparator = auto() + + # Failed to compare a UUID4 field because one of the fields being compared was not present or + # `None`. + UUID4ComparatorExistenceCheck = auto() + class ComparatorFinding(NamedTuple): """Store all information about a single failed matching between expected and actual output.""" diff --git a/src/sentry/incidents/models.py b/src/sentry/incidents/models.py index f4949dd5b61009..18455dc66cf799 100644 --- a/src/sentry/incidents/models.py +++ b/src/sentry/incidents/models.py @@ -2,6 +2,8 @@ from collections import namedtuple from enum import Enum +from typing import Optional +from uuid import uuid4 from django.conf import settings from django.core.cache import cache @@ -9,7 +11,8 @@ from django.db.models.signals import post_delete, post_save from django.utils import timezone -from sentry.backup.scopes import RelocationScope +from sentry.backup.dependencies import PrimaryKeyMap +from sentry.backup.scopes import ImportScope, RelocationScope from sentry.db.models import ( ArrayField, FlexibleForeignKey, @@ -212,6 +215,18 @@ def current_end_date(self): def duration(self): return self.current_end_date - self.date_started + def _normalize_before_relocation_import( + self, pk_map: PrimaryKeyMap, scope: ImportScope + ) -> Optional[int]: + old_pk = super()._normalize_before_relocation_import(pk_map, scope) + if old_pk is None: + return None + + # Generate a new UUID, if one exists. + if self.detection_uuid: + self.detection_uuid = uuid4() + return old_pk + @region_silo_only_model class PendingIncidentSnapshot(Model): @@ -280,6 +295,18 @@ class Meta: app_label = "sentry" db_table = "sentry_incidentactivity" + def _normalize_before_relocation_import( + self, pk_map: PrimaryKeyMap, scope: ImportScope + ) -> Optional[int]: + old_pk = super()._normalize_before_relocation_import(pk_map, scope) + if old_pk is None: + return None + + # Generate a new UUID, if one exists. + if self.notification_uuid: + self.notification_uuid = uuid4() + return old_pk + @region_silo_only_model class IncidentSubscription(Model): diff --git a/src/sentry/models/projectkey.py b/src/sentry/models/projectkey.py index e56c53bf5a2060..cf75d519ce1048 100644 --- a/src/sentry/models/projectkey.py +++ b/src/sentry/models/projectkey.py @@ -286,7 +286,18 @@ def write_relocation_import( if old_pk is None: return None - (key, _) = self.__class__.objects.get_or_create( + # If there is a key collision, generate new keys. + # + # TODO(getsentry/team-ospo#190): Is this the right behavior? Another option is to just + # update the SQL schema to no longer require uniqueness. + matching_public_key = self.__class__.objects.filter(public_key=self.public_key).first() + if not self.public_key or matching_public_key: + self.public_key = self.generate_api_key() + matching_secret_key = self.__class__.objects.filter(secret_key=self.secret_key).first() + if not self.secret_key or matching_secret_key: + self.secret_key = self.generate_api_key() + + (key, _) = ProjectKey.objects.get_or_create( project=self.project, defaults=model_to_dict(self) ) if key: diff --git a/src/sentry/monitors/models.py b/src/sentry/monitors/models.py index c27397270690d3..2a67a11307c850 100644 --- a/src/sentry/monitors/models.py +++ b/src/sentry/monitors/models.py @@ -3,6 +3,7 @@ import logging from datetime import datetime, timedelta from typing import TYPE_CHECKING, Any, Dict, Optional +from uuid import uuid4 import jsonschema import pytz @@ -14,7 +15,8 @@ from django.dispatch import receiver from django.utils import timezone -from sentry.backup.scopes import RelocationScope +from sentry.backup.dependencies import PrimaryKeyMap +from sentry.backup.scopes import ImportScope, RelocationScope from sentry.constants import ObjectStatus from sentry.db.models import ( BaseManager, @@ -357,6 +359,17 @@ def get_alert_rule_data(self): return None + def _normalize_before_relocation_import( + self, pk_map: PrimaryKeyMap, scope: ImportScope + ) -> Optional[int]: + old_pk = super()._normalize_before_relocation_import(pk_map, scope) + if old_pk is None: + return None + + # Generate a new UUID. + self.guid = uuid4() + return old_pk + @receiver(pre_save, sender=Monitor) def check_organization_monitor_limits(sender, instance, **kwargs): diff --git a/tests/sentry/backup/test_comparators.py b/tests/sentry/backup/test_comparators.py index 88ed8f4f6ef1d7..7ce6c6d613099e 100644 --- a/tests/sentry/backup/test_comparators.py +++ b/tests/sentry/backup/test_comparators.py @@ -11,6 +11,8 @@ HashObfuscatingComparator, IgnoredComparator, ScrubbedData, + SecretHexComparator, + UUID4Comparator, ) from sentry.backup.dependencies import PrimaryKeyMap, dependencies from sentry.backup.findings import ComparatorFindingKind, InstanceID @@ -914,3 +916,206 @@ def test_good_ignored_comparator_scrubbed(): assert right["scrubbed"] assert right["scrubbed"]["IgnoredComparator::ignored_field"] is ScrubbedData() assert right["scrubbed"].get("IgnoredComparator::other_field") is None + + +def test_good_secret_hex_comparator(): + cmp = SecretHexComparator(8, "equal", "unequal") + id = InstanceID("test", 0) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "equal": "3e04f551c7219550", + "unequal": "3e04f551c7219550", + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "equal": "3e04f551c7219550", + "unequal": "50a7e2c7e3ca35fc", + }, + } + assert not cmp.compare(id, left, right) + + +def test_bad_secret_hex_comparator(): + cmp = SecretHexComparator(8, "same", "invalid_left", "invalid_right") + id = InstanceID("test", 0) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "same": "3e04f551c7219550", + "invalid_left": "foo", + "invalid_right": "50a7e2c7e3ca35fc", + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "same": "3e04f551c7219550", + "invalid_left": "50a7e2c7e3ca35fc", + "invalid_right": "bar", + }, + } + res = cmp.compare(id, left, right) + assert res + assert len(res) == 2 + + assert res[0] + assert res[0].kind == ComparatorFindingKind.SecretHexComparator + assert res[0].on == id + assert res[0].left_pk == 1 + assert res[0].right_pk == 1 + assert "`invalid_left`" in res[0].reason + assert "left" in res[0].reason + assert "regex" in res[0].reason + assert "foo" in res[0].reason + + assert res[1] + assert res[1].kind == ComparatorFindingKind.SecretHexComparator + assert res[1].on == id + assert res[1].left_pk == 1 + assert res[1].right_pk == 1 + assert "`invalid_right`" in res[1].reason + assert "right" in res[1].reason + assert "regex" in res[1].reason + assert "bar" in res[1].reason + + +def test_good_secret_hex_comparator_scrubbed(): + cmp = SecretHexComparator(8, "secret_hex_field") + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "secret_hex_field": "3e04f551c7219550", + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "secret_hex_field": "3e04f551c7219550", + }, + } + cmp.scrub(left, right) + assert left["scrubbed"] + assert left["scrubbed"]["SecretHexComparator::secret_hex_field"] is ScrubbedData() + + assert right["scrubbed"] + assert right["scrubbed"]["SecretHexComparator::secret_hex_field"] is ScrubbedData() + + +def test_good_uuid4_comparator(): + cmp = UUID4Comparator("guid_field") + id = InstanceID("test", 0) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "guid_field": "4c79eea3-8a71-4b99-b291-1f6a906fbb47", + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "guid_field": "bb41a040-b413-4b89-aa03-179470d9ee05", + }, + } + assert not cmp.compare(id, left, right) + + +def test_bad_uuid4_comparator(): + cmp = UUID4Comparator("same", "invalid_left", "invalid_right") + id = InstanceID("test", 0) + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "same": "4c79eea3-8a71-4b99-b291-1f6a906fbb47", + "invalid_left": "foo", + "invalid_right": "bb41a040-b413-4b89-aa03-179470d9ee05", + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "same": "4c79eea3-8a71-4b99-b291-1f6a906fbb47", + "invalid_left": "bb41a040-b413-4b89-aa03-179470d9ee05", + "invalid_right": "bar", + }, + } + res = cmp.compare(id, left, right) + assert res + assert len(res) == 3 + + assert res[0] + assert res[0].kind == ComparatorFindingKind.UUID4Comparator + assert res[0].on == id + assert res[0].left_pk == 1 + assert res[0].right_pk == 1 + assert "`same`" in res[0].reason + assert "equal" in res[0].reason + assert "4c79eea3-8a71-4b99-b291-1f6a906fbb47" in res[0].reason + + assert res[1] + assert res[1].kind == ComparatorFindingKind.UUID4Comparator + assert res[1].on == id + assert res[1].left_pk == 1 + assert res[1].right_pk == 1 + assert "`invalid_left`" in res[1].reason + assert "left" in res[1].reason + assert "regex" in res[1].reason + assert "foo" in res[1].reason + + assert res[2] + assert res[2].kind == ComparatorFindingKind.UUID4Comparator + assert res[2].on == id + assert res[2].left_pk == 1 + assert res[2].right_pk == 1 + assert "`invalid_right`" in res[2].reason + assert "right" in res[2].reason + assert "regex" in res[2].reason + assert "bar" in res[2].reason + + +def test_good_uuid4_comparator_scrubbed(): + cmp = UUID4Comparator("guid_field") + left: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "guid_field": "4c79eea3-8a71-4b99-b291-1f6a906fbb47", + }, + } + right: JSONData = { + "model": "test", + "ordinal": 1, + "pk": 1, + "fields": { + "guid_field": "4c79eea3-8a71-4b99-b291-1f6a906fbb47", + }, + } + cmp.scrub(left, right) + assert left["scrubbed"] + assert left["scrubbed"]["UUID4Comparator::guid_field"] is ScrubbedData() + + assert right["scrubbed"] + assert right["scrubbed"]["UUID4Comparator::guid_field"] is ScrubbedData() diff --git a/tests/sentry/backup/test_imports.py b/tests/sentry/backup/test_imports.py index 6b448dba18f1e3..f625adc13d1dbe 100644 --- a/tests/sentry/backup/test_imports.py +++ b/tests/sentry/backup/test_imports.py @@ -408,6 +408,38 @@ def test_import_signaling_organization(self): assert ProjectOption.objects.filter(key="sentry:relay-rev-lastchange").exists() assert ProjectOption.objects.filter(key="sentry:option-epoch").exists() + def test_import_colliding_project_key(self): + owner = self.create_exhaustive_user("owner") + invited = self.create_exhaustive_user("invited") + member = self.create_exhaustive_user("member") + self.create_exhaustive_organization("some-org", owner, invited, [member]) + + # Take note of the `ProjectKey` that was created by the exhaustive organization - this is + # the one we'll be importing. + colliding = ProjectKey.objects.filter().first() + colliding_public_key = colliding.public_key + colliding_secret_key = colliding.secret_key + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_path = self.export_to_tmp_file_and_clear_database(tmp_dir) + + # After exporting and clearing the database, insert a copy of the same `ProjectKey` as + # the one found in the import. + project = self.create_project() + ProjectKey.objects.create( + project=project, + label="Test", + public_key=colliding_public_key, + secret_key=colliding_secret_key, + ) + + with open(tmp_path) as tmp_file: + import_in_organization_scope(tmp_file, printer=NOOP_PRINTER) + + assert ProjectKey.objects.count() == 4 + assert ProjectKey.objects.filter(public_key=colliding_public_key).count() == 1 + assert ProjectKey.objects.filter(secret_key=colliding_secret_key).count() == 1 + @run_backup_tests_only_on_single_db class ScopingTests(ImportTestCase):