Skip to content

Commit

Permalink
feat(backup): Add EmailObfuscatingComparator (#53763)
Browse files Browse the repository at this point in the history
This comparator is added to prevent sensitive data like user emails from
leaking into logs, stack traces, and so on.

Issue: getsentry/team-ospo#157
  • Loading branch information
azaslavsky authored Jul 31, 2023
1 parent 4ebc61d commit ed99e6e
Show file tree
Hide file tree
Showing 2 changed files with 208 additions and 20 deletions.
107 changes: 96 additions & 11 deletions src/sentry/runner/commands/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from datetime import datetime, timedelta, timezone
from difflib import unified_diff
from io import StringIO
from typing import Dict, List, NamedTuple
from typing import Callable, Dict, List, Literal, NamedTuple

import click
from dateutil import parser
Expand Down Expand Up @@ -79,7 +79,7 @@ class JSONScrubbingComparator(ABC):
the `scrub(...)` methods are. This ensures that comparators that touch the same fields do not
have their inputs mangled by one another."""

def __init__(self, fields: list[str]):
def __init__(self, *fields: str):
self.fields = fields

def check(self, side: str, data: JSONData) -> None:
Expand Down Expand Up @@ -125,15 +125,22 @@ def existence(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Com
)
return findings

def scrub(self, on: InstanceID, left: JSONData, right: JSONData) -> None:
def __scrub__(
self,
left: JSONData,
right: JSONData,
f: Callable[[list[str]], list[str]] | Callable[[list[str]], Literal[True]] = lambda _: True,
) -> None:
"""Removes all of the fields compared by this comparator from the `fields` dict, so that the
remaining fields may be compared for equality.
remaining fields may be compared for equality. Public callers should use the inheritance-safe wrapper, `scrub`, rather than using this internal method directly.
Parameters:
- on: An `InstanceID` that must be shared by both versions of the JSON model being compared.
- left: One of the models being compared (usually the "before") version.
- right: The other model it is being compared against (usually the "after" or
post-processed version).
- f: Optional helper method that populates the RHS of the scrubbed entry. If this is
omitted, the scrubbed entry defaults to `True`.
"""

self.check("left", left)
Expand All @@ -142,11 +149,24 @@ def scrub(self, on: InstanceID, left: JSONData, right: JSONData) -> None:
left["scrubbed"] = {}
if "scrubbed" not in right:
right["scrubbed"] = {}

for field in self.fields:
del left["fields"][field]
left["scrubbed"][f"{self.get_kind()}::{field}"] = True
del right["fields"][field]
right["scrubbed"][f"{self.get_kind()}::{field}"] = True
for side in [left, right]:
if field not in side["fields"]:
continue
value = side["fields"][field]
if not value:
continue
value = [value] if isinstance(value, str) else value
del side["fields"][field]
side["scrubbed"][f"{self.get_kind()}::{field}"] = f(value)

def scrub(
self,
left: JSONData,
right: JSONData,
) -> None:
self.__scrub__(left, right)

def get_kind(self) -> str:
"""A unique identifier for this particular derivation of JSONScrubbingComparator, which will
Expand All @@ -160,7 +180,7 @@ class DateUpdatedComparator(JSONScrubbingComparator):
date that is greater than (ie, occurs after) the specified field's left input."""

def __init__(self, field: str):
super().__init__([field])
super().__init__(field)
self.field = field

def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
Expand All @@ -182,14 +202,79 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[Compa
return []


class EmailObfuscatingComparator(JSONScrubbingComparator):
"""Comparator that compares emails, but then safely truncates them to ensure that they
do not leak out in logs, stack traces, etc."""

def __init__(self, *fields: str):
super().__init__(*fields)

def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> list[ComparatorFinding]:
findings = []
for f in self.fields:
if f not in left["fields"] and f not in right["fields"]:
continue

lv = left["fields"][f]
rv = right["fields"][f]
if lv != rv:
lv = self.truncate([lv] if isinstance(lv, str) else lv)[0]
rv = self.truncate([rv] if isinstance(rv, str) else rv)[0]
findings.append(
ComparatorFinding(
kind=self.get_kind(),
on=on,
reason=f"""the left `{f}` value ("{lv}") on `{on}` was not equal to the
right value ("{rv}")""",
)
)
return findings

def scrub(
self,
left: JSONData,
right: JSONData,
) -> None:
super().__scrub__(left, right, self.truncate)

@staticmethod
def truncate(data: list[str]) -> list[str]:
truncated = []
for d in data:
parts = d.split("@")
if len(parts) == 2:
username = parts[0]
domain = parts[1]
truncated.append(f"{username[0]}...@...{domain[-6:]}")
else:
truncated.append(d)
return truncated


ComparatorList = List[JSONScrubbingComparator]
ComparatorMap = Dict[str, ComparatorList]
DEFAULT_COMPARATORS: ComparatorMap = {
"sentry.apitoken": [EmailObfuscatingComparator("user")],
"sentry.apiapplication": [EmailObfuscatingComparator("owner")],
"sentry.apiauthorization": [EmailObfuscatingComparator("user")],
"sentry.authidentity": [EmailObfuscatingComparator("user")],
"sentry.authenticator": [EmailObfuscatingComparator("user")],
"sentry.email": [EmailObfuscatingComparator("email")],
"sentry.alertrule": [DateUpdatedComparator("date_modified")],
"sentry.incidenttrigger": [DateUpdatedComparator("date_modified")],
"sentry.organizationmember": [EmailObfuscatingComparator("user_email")],
"sentry.querysubscription": [DateUpdatedComparator("date_updated")],
"sentry.sentryapp": [EmailObfuscatingComparator("creator_user", "creator_label", "proxy_user")],
"sentry.user": [EmailObfuscatingComparator("email", "username")],
"sentry.useremail": [EmailObfuscatingComparator("email", "user")],
"sentry.userip": [EmailObfuscatingComparator("user")],
"sentry.useroption": [EmailObfuscatingComparator("user")],
"sentry.userpermission": [EmailObfuscatingComparator("user")],
"sentry.userrole": [DateUpdatedComparator("date_updated")],
"sentry.userroleuser": [DateUpdatedComparator("date_updated")],
"sentry.userroleuser": [
DateUpdatedComparator("date_updated"),
EmailObfuscatingComparator("user"),
],
}


Expand Down Expand Up @@ -260,7 +345,7 @@ def json_lines(obj: JSONData) -> list[str]:
if res:
findings.extend(res)
for cmp in comparators[id.model]:
cmp.scrub(id, exp, act)
cmp.scrub(exp, act)

# Finally, perform a diff on the remaining JSON.
diff = list(unified_diff(json_lines(exp["fields"]), json_lines(act["fields"]), n=3))
Expand Down
121 changes: 112 additions & 9 deletions tests/sentry/backup/test_comparators.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from sentry.runner.commands.backup import DateUpdatedComparator, InstanceID
from sentry.runner.commands.backup import (
DateUpdatedComparator,
EmailObfuscatingComparator,
InstanceID,
)
from sentry.utils.json import JSONData


def test_good_comparator_both_sides_existing():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
present = {
present: JSONData = {
"model": "test",
"pk": 1,
"fields": {
Expand All @@ -17,7 +22,7 @@ def test_good_comparator_both_sides_existing():
def test_good_comparator_neither_side_existing():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
missing = {
missing: JSONData = {
"model": "test",
"pk": 1,
"fields": {},
Expand All @@ -28,14 +33,14 @@ def test_good_comparator_neither_side_existing():
def test_bad_comparator_only_one_side_existing():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
present = {
present: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:12:34.567Z",
},
}
missing = {
missing: JSONData = {
"model": "test",
"pk": 1,
"fields": {},
Expand All @@ -60,14 +65,14 @@ def test_bad_comparator_only_one_side_existing():
def test_good_date_updated_comparator():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
left = {
left: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:00:00.123Z",
},
}
right = {
right: JSONData = {
"model": "test",
"pk": 1,
"fields": {
Expand All @@ -80,14 +85,14 @@ def test_good_date_updated_comparator():
def test_bad_date_updated_comparator():
cmp = DateUpdatedComparator("my_date_field")
id = InstanceID("test", 1)
left = {
left: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"my_date_field": "2023-06-22T23:12:34.567Z",
},
}
right = {
right: JSONData = {
"model": "test",
"pk": 1,
"fields": {
Expand All @@ -99,3 +104,101 @@ def test_bad_date_updated_comparator():
assert res[0]
assert res[0].on == id
assert res[0].kind == "DateUpdatedComparator"


def test_good_email_obfuscating_comparator():
cmp = EmailObfuscatingComparator("one_email", "many_emails")
id = InstanceID("test", 1)
model = {
"model": "test",
"pk": 1,
"fields": {
"one_email": "[email protected]",
"many_emails": [
"[email protected]",
"[email protected]",
],
},
}
assert not cmp.compare(id, model, model)


def test_bad_email_obfuscating_comparator():
cmp = EmailObfuscatingComparator("one_email", "many_emails")
id = InstanceID("test", 1)
left: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"one_email": "[email protected]",
"many_emails": [
"[email protected]",
"[email protected]",
],
},
}
right: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"one_email": "[email protected]",
"many_emails": [
"[email protected]",
"[email protected]",
],
},
}
res = cmp.compare(id, left, right)
assert res

assert res[0]
assert res[0].on == id
assert res[0].kind == "EmailObfuscatingComparator"
assert "[email protected]" in res[0].reason
assert "[email protected]" in res[0].reason

assert res[1]
assert res[1].on == id
assert res[1].kind == "EmailObfuscatingComparator"
assert "[email protected]" in res[1].reason
assert "[email protected]" in res[1].reason


def test_goo_email_obfuscating_comparator_scrubbed():
cmp = EmailObfuscatingComparator("one_email", "many_emails")
left: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"one_email": "[email protected]",
"many_emails": [
"[email protected]",
"[email protected]",
],
},
}
right: JSONData = {
"model": "test",
"pk": 1,
"fields": {
"one_email": "[email protected]",
"many_emails": [
"[email protected]",
"[email protected]",
],
},
}
cmp.scrub(left, right)
assert left["scrubbed"]
assert left["scrubbed"]["EmailObfuscatingComparator::one_email"] == ["[email protected]"]
assert left["scrubbed"]["EmailObfuscatingComparator::many_emails"] == [
"[email protected]",
"[email protected]",
]

assert right["scrubbed"]
assert right["scrubbed"]["EmailObfuscatingComparator::one_email"] == ["[email protected]"]
assert right["scrubbed"]["EmailObfuscatingComparator::many_emails"] == [
"[email protected]",
"[email protected]",
]

0 comments on commit ed99e6e

Please sign in to comment.