Skip to content

Commit

Permalink
feat(backup): Support unclaimed users (#55876)
Browse files Browse the repository at this point in the history
This allows us to import users in an "unclaimed" mode: their password
has been reset, and, in the event of a username collision, they have
been given a random suffix to their old username. Such users will then
receive an email when their relocation is complete, informing them that
an account they owned on another Sentry instance has been imported, and
encouraging them to claim this one by setting a new password and
username. By attaching this flag to the imported `User` model instance,
we are able to detect such unclaimed users, and modify the copy/UI shown
to them during this process.

Closes getsentry/team-ospo#181
  • Loading branch information
azaslavsky authored Sep 11, 2023
1 parent 4ac2265 commit bb41a86
Show file tree
Hide file tree
Showing 13 changed files with 535 additions and 12 deletions.
2 changes: 2 additions & 0 deletions fixtures/backup/duplicate-username-and-organization-slug.json
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"is_unclaimed": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
Expand All @@ -112,6 +113,7 @@
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"is_unclaimed": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
Expand Down
1 change: 1 addition & 0 deletions fixtures/backup/fresh-install.json
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"is_unclaimed": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
Expand Down
1 change: 1 addition & 0 deletions fixtures/backup/user-pk-mapping.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"is_unclaimed": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
Expand Down
1 change: 1 addition & 0 deletions fixtures/backup/user-with-maximum-privileges.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"is_unclaimed": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
Expand Down
1 change: 1 addition & 0 deletions fixtures/backup/user-with-minimum-privileges.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"is_managed": false,
"is_sentry_app": null,
"is_password_expired": false,
"is_unclaimed": false,
"last_password_change": "2023-06-22T22:59:57.023Z",
"flags": "0",
"session_nonce": null,
Expand Down
2 changes: 1 addition & 1 deletion migrations_lockfile.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ will then be regenerated, and you should be able to merge without conflicts.
feedback: 0001_feedback
nodestore: 0002_nodestore_no_dictfield
replays: 0003_add_size_to_recording_segment
sentry: 0547_add_commitfilechange_language_column
sentry: 0548_add_is_unclaimed_boolean_to_user
social_auth: 0002_default_auto_field
94 changes: 90 additions & 4 deletions src/sentry/backup/comparators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re
from abc import ABC, abstractmethod
from collections import defaultdict
from datetime import datetime, timezone
from functools import lru_cache
from typing import Callable, Dict, List, Type

Expand All @@ -16,6 +17,8 @@
from sentry.models.user import User
from sentry.utils.json import JSONData

UNIX_EPOCH = unix_zero_date = datetime.utcfromtimestamp(0).replace(tzinfo=timezone.utc).isoformat()


class ScrubbedData:
"""A singleton class used to indicate data has been scrubbed, without indicating what that data is. A unit type indicating "scrubbing was successful" only."""
Expand Down Expand Up @@ -122,6 +125,10 @@ def __scrub__(
for field in self.fields:
for side in [left, right]:
if side["fields"].get(field) is None:
# Normalize fields that are literally `None` vs those that are totally absent.
if field in side["fields"]:
del side["fields"][field]
side["scrubbed"][f"{self.get_kind().name}::{field}"] = None
continue
value = side["fields"][field]
value = [value] if not isinstance(value, list) else value
Expand Down Expand Up @@ -191,8 +198,8 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
if left["fields"].get(f) is None and right["fields"].get(f) is None:
continue

left_date_updated = left["fields"][f]
right_date_updated = right["fields"][f]
left_date_updated = left["fields"][f] or UNIX_EPOCH
right_date_updated = right["fields"][f] or UNIX_EPOCH
if parser.parse(left_date_updated) > parser.parse(right_date_updated):
findings.append(
ComparatorFinding(
Expand Down Expand Up @@ -364,7 +371,7 @@ def truncate(self, data: list[str]) -> list[str]:


class HashObfuscatingComparator(ObfuscatingComparator):
"""Comparator that compares hashed values like keys and passwords, but then safely truncates
"""Comparator that compares hashed values like keys and tokens, but then safely truncates
them to ensure that they do not leak out in logs, stack traces, etc."""

def truncate(self, data: list[str]) -> list[str]:
Expand All @@ -380,6 +387,80 @@ def truncate(self, data: list[str]) -> list[str]:
return truncated


class UserPasswordObfuscatingComparator(ObfuscatingComparator):
"""
Comparator that safely truncates passwords to ensure that they do not leak out in logs, stack
traces, etc. Additionally, it validates that the left and right "claimed" status is correct.
Namely, we want the following behaviors:
- If the left side is `is_unclaimed = True` but the right side is `is_unclaimed = False`, error.
- If the right side is `is_unclaimed = True`, make sure the password has changed.
- If the right side is `is_unclaimed = False`, make sure that the password stays the same.
"""

def __init__(self):
super().__init__("password")

def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
findings = []

# Error case: there is no importing action that can "claim" a user.
if left["fields"].get("is_unclaimed") and not right["fields"].get("is_unclaimed"):
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
left_pk=left["pk"],
right_pk=right["pk"],
reason="""the left value of `is_unclaimed` was `True` but the right value was `False`, even though the act of importing cannot claim users""",
)
)

# Old user, password must remain constant.
if not right["fields"].get("is_unclaimed"):
findings.extend(super().compare(on, left, right))
return findings

# New user, password must change.
left_password = left["fields"]["password"]
right_password = right["fields"]["password"]
if left_password == right_password:
left_pw_truncated = self.truncate(
[left_password] if not isinstance(left_password, list) else left_password
)[0]
right_pw_truncated = self.truncate(
[right_password] if not isinstance(right_password, list) else right_password
)[0]
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
left_pk=left["pk"],
right_pk=right["pk"],
reason=f"""the left value ("{left_pw_truncated}") of `password` was equal to the
right value ("{right_pw_truncated}"), which is disallowed when
`is_unclaimed` is `True`""",
)
)

return findings

def truncate(self, data: list[str]) -> list[str]:
truncated = []
for d in data:
length = len(d)
if length > 80:
# Retains algorithm identifying prefix, plus a few characters on the end.
truncated.append(f"{d[:12]}...{d[-6:]}")
elif length > 40:
# Smaller hashes expose less information
truncated.append(f"{d[:6]}...{d[-4:]}")
else:
# Very small hashes expose no information at all.
truncated.append("...")
return truncated


class IgnoredComparator(JSONScrubbingComparator):
"""Ensures that two fields are tested for mutual existence, and nothing else.
Expand Down Expand Up @@ -583,7 +664,12 @@ def get_default_comparators():
"sentry.servicehook": [HashObfuscatingComparator("secret")],
"sentry.user": [
AutoSuffixComparator("username"),
HashObfuscatingComparator("password"),
DateUpdatedComparator("last_password_change"),
# UserPasswordComparator handles `is_unclaimed` and `password` for us. Because of
# this, we can ignore the `is_unclaimed` field otherwise and scrub it from the
# comparison.
IgnoredComparator("is_unclaimed"),
UserPasswordObfuscatingComparator(),
],
"sentry.useremail": [
DateUpdatedComparator("date_hash_added"),
Expand Down
7 changes: 7 additions & 0 deletions src/sentry/backup/findings.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,13 @@ class ComparatorFindingKind(IntEnum):
# `None`.
UUID4ComparatorExistenceCheck = auto()

# Incorrect user password field.
UserPasswordObfuscatingComparator = auto()

# Failed to compare a user password field because one of the fields being compared was not
# present or `None`.
UserPasswordObfuscatingComparatorExistenceCheck = auto()


class ComparatorFinding(NamedTuple):
"""Store all information about a single failed matching between expected and actual output."""
Expand Down
2 changes: 1 addition & 1 deletion src/sentry/backup/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def json_lines(obj: JSONData) -> list[str]:
cmp.scrub(left, right)

# Finally, perform a diff on the remaining JSON.
diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=3))
diff = list(unified_diff(json_lines(left["fields"]), json_lines(right["fields"]), n=15))
if diff:
findings.append(
ComparatorFinding(
Expand Down
46 changes: 46 additions & 0 deletions src/sentry/migrations/0548_add_is_unclaimed_boolean_to_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# Generated by Django 3.2.20 on 2023-09-07 23:14

from django.db import migrations, models

from sentry.new_migrations.migrations import CheckedMigration


class Migration(CheckedMigration):
# This flag is used to mark that a migration shouldn't be automatically run in production. For
# the most part, this should only be used for operations where it's safe to run the migration
# after your code has deployed. So this should not be used for most operations that alter the
# schema of a table.
# Here are some things that make sense to mark as dangerous:
# - Large data migrations. Typically we want these to be run manually by ops so that they can
# be monitored and not block the deploy for a long period of time while they run.
# - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to
# have ops run this and not block the deploy. Note that while adding an index is a schema
# change, it's completely safe to run the operation after the code has deployed.
is_dangerous = False

dependencies = [
("sentry", "0547_add_commitfilechange_language_column"),
]

operations = [
migrations.SeparateDatabaseAndState(
database_operations=[
migrations.RunSQL(
"""
ALTER TABLE "auth_user" ADD COLUMN "is_unclaimed" boolean NOT NULL DEFAULT FALSE;
""",
reverse_sql="""
ALTER TABLE "auth_user" DROP COLUMN "is_unclaimed";
""",
hints={"tables": ["auth_user"]},
),
],
state_operations=[
migrations.AddField(
model_name="user",
name="is_unclaimed",
field=models.BooleanField(default=False),
),
],
)
]
40 changes: 37 additions & 3 deletions src/sentry/models/user.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging
import secrets
import warnings
from string import ascii_letters, digits
from typing import List, Optional, Tuple

from django.contrib.auth.models import AbstractBaseUser
Expand Down Expand Up @@ -42,6 +44,8 @@
audit_logger = logging.getLogger("sentry.audit.user")

MAX_USERNAME_LENGTH = 128
RANDOM_PASSWORD_ALPHABET = ascii_letters + digits
RANDOM_PASSWORD_LENGTH = 32


class UserManager(BaseManager, DjangoUserManager):
Expand Down Expand Up @@ -98,6 +102,16 @@ class User(BaseModel, AbstractBaseUser):
"active. Unselect this instead of deleting accounts."
),
)
is_unclaimed = models.BooleanField(
_("unclaimed"),
default=False,
help_text=_(
"Designates that this user was imported via the relocation tool, but has not yet been "
"claimed by the owner of the associated email. Users in this state have randomized "
"passwords - when email owners claim the account, they are prompted to reset their "
"password and do a one-time update to their username."
),
)
is_superuser = models.BooleanField(
_("superuser status"),
default=False,
Expand Down Expand Up @@ -387,9 +401,22 @@ def _normalize_before_relocation_import(
if old_pk is None:
return None

# New users are always unclaimed.
self.is_unclaimed = True

# Give the user a cryptographically secure random password. The purpose here is to have a
# password that NO ONE knows - the only way to log into this account is to use the "claim
# your account" flow to create a new password (or to click "lost password" and end up there
# anyway), at which point we'll detect the user's `is_unclaimed`` status and prompt them to
# change their `username` as well.
self.set_password(
"".join(secrets.choice(RANDOM_PASSWORD_ALPHABET) for _ in range(RANDOM_PASSWORD_LENGTH))
)

if scope != ImportScope.Global:
self.is_staff = False
self.is_superuser = False
self.is_managed = False

lock = locks.get(f"user:username:{self.id}", duration=5, name="username")
with TimedRetryPolicy(10)(lock.acquire):
Expand All @@ -400,9 +427,6 @@ def _normalize_before_relocation_import(
field_name="username",
)

# TODO(getsentry/team-ospo#181): Create unclaimed users: set the (to be created) unclaimed
# flag, and wipe the password.

return old_pk

def write_relocation_import(
Expand All @@ -413,6 +437,7 @@ def write_relocation_import(
SuperuserUserSerializer,
UserSerializer,
)
from sentry.services.hybrid_cloud.lost_password_hash import lost_password_hash_service

if flags.merge_users:
existing = User.objects.filter(username=self.username).first()
Expand All @@ -433,6 +458,15 @@ def write_relocation_import(
serializer_user.is_valid(raise_exception=True)

self.save(force_insert=True)

# TODO(getsentry/team-ospo#190): the following is an RPC call which could fail for transient
# reasons (network etc). How do we handle that?
lost_password_hash_service.get_or_create(user_id=self.id)

# TODO(getsentry/team-ospo#191): we need to send an email informing the user of their new
# account with a resettable password - we'll need to figure out where in the process that
# actually goes, and how to prevent it from happening during the validation pass.

return (old_pk, self.pk, ImportKind.Inserted)


Expand Down
Loading

0 comments on commit bb41a86

Please sign in to comment.