Skip to content

Commit

Permalink
Fix bug causing missing datamap rows (#1124)
Browse files Browse the repository at this point in the history
* Update export.py

* fix mistakenly dropped datamap rows

* add tests for bug that fail with original state

* fix test failure with grouping in delete_df

* fix mypy reassignment issues, changelog

Co-authored-by: SteveDMurphy <[email protected]>
  • Loading branch information
ThomasLaPiana and SteveDMurphy authored Sep 29, 2022
1 parent adc0b34 commit 6c723e6
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ The types of changes are:
### Fixed

* Fixed the "help" link in the UI header [#1078](https://github.com/ethyca/fides/pull/1078)
* Fixed a bug where rows were being excluded from a data map [#1124](https://github.com/ethyca/fides/pull/1124)

### Security

Expand Down
21 changes: 17 additions & 4 deletions src/fidesctl/ctl/core/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,10 +389,22 @@ def build_joined_dataframe(
# restructure the joined dataframe to represent system and dataset data categories appropriately
joined_df = union_data_categories_in_joined_dataframe(joined_df)

delete_df = joined_df[["system.name", "unioned_data_categories"]][
joined_df.groupby(["system.name", "unioned_data_categories"])[
"dataset.name"
].transform("count")
delete_df = joined_df[
[
"system.name",
"system.privacy_declaration.data_use.name",
"system.privacy_declaration.data_subjects.name",
"unioned_data_categories",
]
][
joined_df.groupby(
[
"system.name",
"system.privacy_declaration.data_use.name",
"system.privacy_declaration.data_subjects.name",
"unioned_data_categories",
]
)["dataset.name"].transform("count")
> 1
].drop_duplicates()

Expand Down Expand Up @@ -438,6 +450,7 @@ def export_datamap(
get_server_resource(url, "organization", organization_fides_key, headers)
]
}
# Verify this isn't dropping records on the joins
for resource_type in ["system", "dataset", "data_subject", "data_use"]:
server_resources = list_server_resources(
url,
Expand Down
108 changes: 105 additions & 3 deletions tests/ctl/core/test_export.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# pylint: disable=missing-docstring, redefined-outer-name
from typing import Generator
from typing import Dict, Generator

import pytest
from fideslang.models import (
Expand All @@ -18,7 +18,7 @@


@pytest.fixture()
def test_sample_system_taxonomy() -> Generator:
def test_sample_system_taxonomy() -> Generator[Dict, None, None]:
yield {
"system": [
System(
Expand All @@ -36,7 +36,7 @@ def test_sample_system_taxonomy() -> Generator:
data_use="provide.service",
data_qualifier="aggregated.anonymized",
data_subjects=["customer"],
dataset_references=["users_dataset"],
dataset_references=["test_dataset"],
)
],
)
Expand All @@ -45,6 +45,12 @@ def test_sample_system_taxonomy() -> Generator:
"data_use": [
DataUse(fides_key="provide.service", name="System", parent_key="provide")
],
"organization": [
Organization(
fides_key="default_organization",
security_policy="https://www.google.com/",
)
],
}


Expand Down Expand Up @@ -125,3 +131,99 @@ def test_organization_records_to_export() -> None:
[Organization(fides_key="default_organization")]
)
assert len(output_list) == 5


@pytest.mark.unit
def test_joined_datamap_export_system_only(
test_sample_system_taxonomy: Dict,
test_config: FidesctlConfig,
) -> None:
"""
Asserts the correct number of rows are exported for a basic system
"""
sample_taxonomy: Dict = test_sample_system_taxonomy
sample_taxonomy["dataset"] = []
output_list = export.build_joined_dataframe(test_sample_system_taxonomy)
assert len(output_list) == 2


@pytest.mark.unit
def test_joined_datamap_export_system_dataset_overlap(
test_sample_system_taxonomy: Dict,
test_sample_dataset_taxonomy: Generator,
test_config: FidesctlConfig,
) -> None:
"""
Asserts the correct number of rows are exported for a system with a dataset
"""
sample_taxonomy: Dict = test_sample_system_taxonomy
sample_taxonomy["dataset"] = test_sample_dataset_taxonomy
output_list = export.build_joined_dataframe(sample_taxonomy)
assert len(output_list) == 5


@pytest.mark.unit
def test_joined_datamap_export_system_dataset_common(
test_sample_system_taxonomy: Dict,
test_config: FidesctlConfig,
) -> None:
"""
Asserts the duplicate rows are removed from an export
"""
sample_taxonomy: Dict = test_sample_system_taxonomy
sample_taxonomy["dataset"] = [
Dataset(
fides_key="test_dataset",
name="test dataset",
description="dataset for testing",
dataset_categories=[],
collections=[
DatasetCollection(
name="test_collection",
data_categories=[],
fields=[
DatasetField(
name="test_field_1",
data_categories=["user.contact.email"],
data_qualifier="aggregated.anonymized",
retention="No retention policy",
),
DatasetField(
name="test_field_2",
data_categories=["user.contact.name"],
data_qualifier="aggregated.anonymized",
),
],
)
],
)
]
output_list = export.build_joined_dataframe(sample_taxonomy)
assert len(output_list) == 2


@pytest.mark.unit
def test_joined_datamap_export_system_multiple_declarations_overlap(
test_sample_system_taxonomy: Dict,
test_config: FidesctlConfig,
) -> None:
"""
Asserts the correct number of rows are exported for a complex system
"""
sample_taxonomy: Dict = test_sample_system_taxonomy
new_data_subject = DataSubject(fides_key="prospect", name="prospect")
new_declaration = PrivacyDeclaration(
name="privacy_declaration_2",
data_categories=[
"user.contact.email",
"user.contact.name",
],
data_use="provide.service",
data_qualifier="aggregated.anonymized",
data_subjects=["prospect"],
)
sample_taxonomy["data_subject"].append(new_data_subject)
sample_taxonomy["system"][0].privacy_declarations.append(new_declaration)
sample_taxonomy["dataset"] = []
output_list = export.build_joined_dataframe(sample_taxonomy)
assert len(output_list) == 4

0 comments on commit 6c723e6

Please sign in to comment.