From b09e5b3773ede12eed14aeb0622e96fc6830a4ba Mon Sep 17 00:00:00 2001 From: Alex Zaslavsky Date: Fri, 21 Jul 2023 10:10:06 -0700 Subject: [PATCH] ref(backup): Do deepcopy before validation (#53346) Previously, all validation mutated the inputs in-place. This was performant (no need to make deep copies of two potentially large JSON blobs), but potentially bug prone if not held carefully, since the validation itself mutated the inputs via JSONScrubbingComparators. This means that the post-comparison input objects MUST NOT be re-used, since they are meaningfully different from the inputs that were declared valid. Instead, we now do deepcopies under the hood, leaving the originally passed-in inputs untouched. Issue: getsentry/team-ospo#156 --- src/sentry/runner/commands/backup.py | 13 +++++++++++-- tests/sentry/backup/test_correctness.py | 10 ++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/src/sentry/runner/commands/backup.py b/src/sentry/runner/commands/backup.py index 510d0efeceddfc..309f2741bbaeba 100644 --- a/src/sentry/runner/commands/backup.py +++ b/src/sentry/runner/commands/backup.py @@ -1,6 +1,7 @@ from __future__ import annotations from abc import ABC, abstractmethod +from copy import deepcopy from datetime import datetime, timedelta, timezone from difflib import unified_diff from io import StringIO @@ -159,10 +160,13 @@ def compare(self, on: InstanceID, left: JSONData, right: JSONData) -> Comparator def validate( - expect: JSONData, actual: JSONData, comparators: ComparatorMap = DEFAULT_COMPARATORS + expect: JSONData, + actual: JSONData, + comparators: ComparatorMap = DEFAULT_COMPARATORS, ) -> ComparatorFindings: """Ensures that originally imported data correctly matches actual outputted data, and produces a - list of reasons why not when it doesn't""" + list of reasons why not when it doesn't. + """ def json_lines(obj: JSONData) -> list[str]: """Take a JSONData object and pretty-print it as JSON.""" @@ -176,6 +180,11 @@ def json_lines(obj: JSONData) -> list[str]: id = InstanceID(model["model"], model["pk"]) exp_models[id] = model + # Because we may be scrubbing data from the objects as we compare them, we may (optionally) make + # deep copies to start to avoid potentially mangling the input data. + expect = deepcopy(expect) + actual = deepcopy(actual) + # Ensure that the actual JSON contains no duplicates - we assume that the expected JSON did not. for model in actual: id = InstanceID(model["model"], model["pk"]) diff --git a/tests/sentry/backup/test_correctness.py b/tests/sentry/backup/test_correctness.py index b047ade1dbbfbe..403b21fe049084 100644 --- a/tests/sentry/backup/test_correctness.py +++ b/tests/sentry/backup/test_correctness.py @@ -20,7 +20,9 @@ def import_export_then_validate( - tmp_path: Path, fixture_file_name: str, map: ComparatorMap = EMPTY_COMPARATORS_FOR_TESTING + tmp_path: Path, + fixture_file_name: str, + map: ComparatorMap = EMPTY_COMPARATORS_FOR_TESTING, ) -> None: """Test helper that validates that data imported from a fixture `.json` file correctly matches the actual outputted export data.""" @@ -34,11 +36,7 @@ def import_export_then_validate( rv = CliRunner().invoke(import_, [str(fixture_file_path)]) assert rv.exit_code == 0, rv.output - res = validate( - expect, - tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")), - map, - ) + res = validate(expect, tmp_export_to_file(tmp_path.joinpath("tmp_test_file.json")), map) if res.findings: raise ValidationError(res)