Skip to content

Commit

Permalink
feat(backup): Support export filtering
Browse files Browse the repository at this point in the history
This feature allows user to control what subset of their database they
export, filtered by either user (when exporting in user scope) or
organisation (when exporting in org scope). The available filters are a
mirror of those offered for import scopes.

Closes getsentry/team-ospo#167
  • Loading branch information
azaslavsky committed Aug 31, 2023
1 parent 4711131 commit 7943bf8
Show file tree
Hide file tree
Showing 4 changed files with 308 additions and 25 deletions.
112 changes: 103 additions & 9 deletions src/sentry/backup/exports.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,13 @@
from django.core.serializers import serialize
from django.core.serializers.json import DjangoJSONEncoder

from sentry.backup.dependencies import sorted_dependencies
from sentry.backup.dependencies import (
PrimaryKeyMap,
dependencies,
normalize_model_name,
sorted_dependencies,
)
from sentry.backup.helpers import Filter
from sentry.backup.scopes import ExportScope

UTC_0 = timezone(timedelta(hours=0))
Expand Down Expand Up @@ -59,14 +65,82 @@ def __init__(
self.use_natural_foreign_keys = use_natural_foreign_keys


def _export(dest, scope: ExportScope, old_config: OldExportConfig, indent: int, printer=click.echo):
def _export(
dest,
scope: ExportScope,
old_config: OldExportConfig,
*,
indent: int = 2,
filter_by: Filter | None = None,
printer=click.echo,
):
"""
Exports core data for the Sentry installation.
It is generally preferable to avoid calling this function directly, as there are certain combinations of input parameters that should not be used together. Instead, use one of the other wrapper functions in this file, named `export_in_XXX_scope()`.
"""

# Import here to prevent circular module resolutions.
from sentry.models.email import Email
from sentry.models.organization import Organization
from sentry.models.organizationmember import OrganizationMember
from sentry.models.user import User
from sentry.models.useremail import UserEmail

allowed_relocation_scopes = scope.value
pk_map = PrimaryKeyMap()
deps = dependencies()

filters = []
if filter_by is not None:
filters.append(filter_by)

if filter_by.model == Organization:
org_pks = [o.pk for o in Organization.objects.filter(slug__in=filter_by.values)]
user_pks = [
o.user_id
for o in OrganizationMember.objects.filter(organization_id__in=set(org_pks))
]
filters.append(Filter(User, "pk", set(user_pks)))
elif filter_by.model == User:
user_pks = [u.pk for u in User.objects.filter(username__in=filter_by.values)]
else:
raise TypeError("Filter arguments must only apply to `Organization` or `User` models")

# `sentry.Email` models don't have any explicit dependencies on `User`, so we need to find
# them manually via `UserEmail`.
emails = [ue.email for ue in UserEmail.objects.filter(user__in=user_pks)]
filters.append(Filter(Email, "email", set(emails)))

def filter_objects(queryset_iterator):
# Intercept each value from the queryset iterator and ensure that all of its dependencies
# have already been exported. If they have, store it in the `pk_map`, and then yield it
# again. If they have not, we know that some upstream model was filtered out, so we ignore
# this one as well.
for item in queryset_iterator:
model = type(item)
model_name = normalize_model_name(model)

# Make sure this model is not explicitly being filtered.
for f in filters:
if f.model == model and getattr(item, f.field, None) not in f.values:
break
else:
# Now make sure its not transitively filtered either.
for field, foreign_field in deps[model_name].foreign_keys.items():
dependency_model_name = normalize_model_name(foreign_field.model)
field_id = field if field.endswith("_id") else f"{field}_id"
fk = getattr(item, field_id, None)
if fk is None:
# Null deps are allowed.
continue
if pk_map.get(dependency_model_name, fk) is None:
# The foreign key value exists, but not found! An upstream model must have
# been filtered out, so we can filter this one out as well.
break
else:
pk_map.insert(model_name, item.pk, item.pk)
yield item

def yield_objects():
# Collate the objects to be serialized.
Expand Down Expand Up @@ -96,7 +170,7 @@ def yield_objects():
continue

queryset = model._base_manager.order_by(model._meta.pk.name)
yield from queryset.iterator()
yield from filter_objects(queryset.iterator())

printer(">> Beginning export", err=True)
serialize(
Expand All @@ -109,23 +183,43 @@ def yield_objects():
)


def export_in_user_scope(src, printer=click.echo):
def export_in_user_scope(src, *, user_filter: set[str] | None = None, printer=click.echo):
"""
Perform an export in the `User` scope, meaning that only models with `RelocationScope.User` will be exported from the provided `src` file.
"""
return _export(src, ExportScope.User, OldExportConfig(), 2, printer)

# Import here to prevent circular module resolutions.
from sentry.models.user import User

return _export(
src,
ExportScope.User,
OldExportConfig(),
filter_by=Filter(User, "username", user_filter) if user_filter is not None else None,
printer=printer,
)


def export_in_organization_scope(src, printer=click.echo):
def export_in_organization_scope(src, *, org_filter: set[str] | None = None, printer=click.echo):
"""
Perform an export in the `Organization` scope, meaning that only models with `RelocationScope.User` or `RelocationScope.Organization` will be exported from the provided `src` file.
"""
return _export(src, ExportScope.Organization, OldExportConfig(), 2, printer)

# Import here to prevent circular module resolutions.
from sentry.models.organization import Organization

return _export(
src,
ExportScope.Organization,
OldExportConfig(),
filter_by=Filter(Organization, "slug", org_filter) if org_filter is not None else None,
printer=printer,
)


def export_in_global_scope(src, printer=click.echo):
def export_in_global_scope(src, *, printer=click.echo):
"""
Perform an export in the `Global` scope, meaning that all models will be exported from the
provided source file.
"""
return _export(src, ExportScope.Global, OldExportConfig(), 2, printer)
return _export(src, ExportScope.Global, OldExportConfig(), printer=printer)
4 changes: 2 additions & 2 deletions src/sentry/runner/commands/backup.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ def export(dest, silent, indent, exclude):
excluded_models=set(exclude),
use_natural_foreign_keys=True,
),
indent,
(lambda *args, **kwargs: None) if silent else click.echo,
indent=indent,
printer=(lambda *args, **kwargs: None) if silent else click.echo,
)
6 changes: 3 additions & 3 deletions src/sentry/testutils/helpers/backups.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self, info: ComparatorFindings):
self.info = info


def export_to_file(path: Path, scope: ExportScope) -> JSONData:
def export_to_file(path: Path, scope: ExportScope, filter_by: set[str] | None = None) -> JSONData:
"""Helper function that exports the current state of the database to the specified file."""

json_file_path = str(path)
Expand All @@ -107,9 +107,9 @@ def export_to_file(path: Path, scope: ExportScope) -> JSONData:
if scope == ExportScope.Global:
export_in_global_scope(tmp_file, printer=NOOP_PRINTER)
elif scope == ExportScope.Organization:
export_in_organization_scope(tmp_file, printer=NOOP_PRINTER)
export_in_organization_scope(tmp_file, org_filter=filter_by, printer=NOOP_PRINTER)
elif scope == ExportScope.User:
export_in_user_scope(tmp_file, printer=NOOP_PRINTER)
export_in_user_scope(tmp_file, user_filter=filter_by, printer=NOOP_PRINTER)
else:
raise AssertionError(f"Unknown `ExportScope`: `{scope.name}`")

Expand Down
Loading

0 comments on commit 7943bf8

Please sign in to comment.