Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

Update Query Builder to use Masking Strategies on Policy Rules [#47] #31

Merged
merged 3 commits into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions data/dataset/mongo_example_test_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ dataset:
fields:
- name: _id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
- name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
Expand Down
22 changes: 22 additions & 0 deletions src/fidesops/graph/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
"""
from __future__ import annotations

from collections import defaultdict

from typing import List, Optional, Tuple, Set, Dict, Literal
from pydantic import BaseModel

Expand Down Expand Up @@ -223,6 +225,26 @@ def identities(self) -> Dict[str, Tuple[str, ...]]:
flds_w_ident = filter(lambda f: f.identity, self.fields)
return {f.name: f.identity for f in flds_w_ident}

@property
def fields_by_category(self) -> Dict[str, List]:
"""Returns mapping of data categories to fields, flips fields -> categories
to be categories -> fields.

Example:
{
"user.provided.identifiable.contact.city": ["city"],
"user.provided.identifiable.contact.street": ["house", "street"],
"system.operations": ["id"],
"user.provided.identifiable.contact.state": ["state"],
"user.provided.identifiable.contact.postal_code": ["zip"]
}
"""
categories = defaultdict(list)
for field in self.fields:
for category in field.data_categories or []:
categories[category].append(field.name)
return categories

class Config:
"""for pydantic incorporation of custom non-pydantic types"""

Expand Down
4 changes: 1 addition & 3 deletions src/fidesops/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,7 @@ def data_category_field_mapping(self) -> Dict[str, Dict[str, List]]:
"""
mapping: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
for node_address, node in self.nodes.items():
for field in node.collection.fields:
for category in field.data_categories:
mapping[str(node_address)][category].append(field.name)
mapping[str(node_address)] = node.collection.fields_by_category
return mapping

def __repr__(self) -> str:
Expand Down
11 changes: 11 additions & 0 deletions src/fidesops/models/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from fidesops.service.masking.strategy.masking_strategy_factory import (
SupportedMaskingStrategies,
)
from fidesops.service.masking.strategy.masking_strategy_nullify import NULL_REWRITE


class ActionType(EnumType):
Expand Down Expand Up @@ -100,6 +101,16 @@ def _validate_rule(
"Erasure Rules must have masking strategies."
)

# Temporary, remove when we have the pieces in place to support more than null masking.
if (
action_type == ActionType.erasure.value
and masking_strategy
and masking_strategy.get("strategy") != NULL_REWRITE
):
raise common_exceptions.RuleValidationError(
"Only the Null Masking Strategy (null_rewrite) is supported at this time."
)

if action_type == ActionType.access.value and storage_destination_id is None:
raise common_exceptions.RuleValidationError(
"Access Rules must have a storage destination."
Expand Down
86 changes: 53 additions & 33 deletions src/fidesops/service/connectors/query_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from fidesops.graph.config import ROOT_COLLECTION_ADDRESS, CollectionAddress
from fidesops.graph.traversal import TraversalNode, Row
from fidesops.models.policy import Policy
from fidesops.models.policy import Policy, ActionType, Rule
from fidesops.service.masking.strategy.masking_strategy_factory import get_strategy
from fidesops.util.collection_util import append

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -36,29 +37,29 @@ def fields(self) -> List[str]:
"""Fields of interest from this traversal traversal_node."""
return [f.name for f in self.node.node.collection.fields]

def update_fields(self, policy: Policy) -> List[str]:
"""List of update-able field names"""

def exists_child(
field_categories: List[str], policy_categories: List[str]
) -> bool:
"""A not very efficient check for any policy category that matches one of the field categories or a prefix of it."""
if field_categories is None or len(field_categories) == 0:
return False
for policy_category in policy_categories:
for field_category in field_categories:
if field_category.startswith(policy_category):
return True

return False

policy_categories = policy.get_erasure_target_categories()

return [
f.name
for f in self.node.node.collection.fields
if exists_child(f.data_categories, policy_categories)
]
def build_rule_target_fields(self, policy: Policy) -> Dict[Rule, List[str]]:
"""
Return dictionary of rules mapped to update-able field names on a given collection
Example:
{<fidesops.models.policy.Rule object at 0xffff9160e190>: ['name', 'code', 'ccn']}
"""
rule_updates: Dict[Rule, List[str]] = {}
for rule in policy.rules:
if rule.action_type != ActionType.erasure:
continue
rule_categories = rule.get_target_data_categories()
if not rule_categories:
continue

targeted_fields = []
collection_categories = self.node.node.collection.fields_by_category
for rule_cat in rule_categories:
for collection_cat, fields in collection_categories.items():
if collection_cat.startswith(rule_cat):
targeted_fields.extend(fields)
rule_updates[rule] = targeted_fields

return rule_updates

@property
def primary_keys(self) -> List[str]:
Expand Down Expand Up @@ -116,6 +117,28 @@ def display_query_data(self) -> Dict[str, Any]:

return data

def update_value_map(self, row: Row, policy: Policy) -> Dict[str, Any]:
"""Map the relevant fields to be updated on the row with their masked values from Policy Rules

Example return: {'name': None, 'ccn': None, 'code': None}

In this example, a Null Masking Strategy was used to determine that the name/ccn/code fields
for a given customer_id will be replaced with null values.

"""
rule_to_collection_fields = self.build_rule_target_fields(policy)

value_map: Dict[str, Any] = {}
for rule, field_names in rule_to_collection_fields.items():
strategy_config = rule.masking_strategy
strategy = get_strategy(
strategy_config["strategy"], strategy_config["configuration"]
)

for field_name in field_names:
value_map[field_name] = strategy.mask(row[field_name])
return value_map

@abstractmethod
def generate_query(
self, input_data: Dict[str, List[Any]], policy: Optional[Policy]
Expand Down Expand Up @@ -172,16 +195,14 @@ def generate_query(
)
return None

def generate_update_stmt(
self, row: Row, policy: Optional[Policy] = None
) -> Optional[TextClause]:
"""Generate a SQL update statement in the form of a TextClause"""
update_fields = self.update_fields(policy)
update_value_map = {k: None for k in update_fields}
update_clauses = [f"{k} = :{k}" for k in update_fields]
def generate_update_stmt(self, row: Row, policy: Policy) -> Optional[TextClause]:
update_value_map = self.update_value_map(row, policy)
update_clauses = [f"{k} = :{k}" for k in update_value_map]
pk_clauses = [f"{k} = :{k}" for k in self.primary_keys]

for pk in self.primary_keys:
update_value_map[pk] = row[pk]

valid = len(pk_clauses) > 0 and len(update_clauses) > 0
if not valid:
logger.warning(
Expand Down Expand Up @@ -276,8 +297,7 @@ def generate_update_stmt(
self, row: Row, policy: Optional[Policy] = None
) -> Optional[MongoStatement]:
"""Generate a SQL update statement in the form of Mongo update statement components"""
update_fields = self.update_fields(policy)
update_clauses = {k: None for k in update_fields}
update_clauses = self.update_value_map(row, policy)
pk_clauses = {k: row[k] for k in self.primary_keys}

valid = len(pk_clauses) > 0 and len(update_clauses) > 0
Expand Down
17 changes: 7 additions & 10 deletions tests/api/v1/endpoints/test_policy_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
generate_fides_data_categories,
)
from fidesops.service.masking.strategy.masking_strategy_hash import HASH
from fidesops.service.masking.strategy.masking_strategy_nullify import NULL_REWRITE


class TestGetPolicies:
Expand Down Expand Up @@ -451,18 +452,14 @@ def test_create_erasure_rule_for_policy(
generate_auth_header,
policy,
):
FORMAT_PRESERVATION_SUFFIX = "@masked.com"
HASH_ALGORITHM = "SHA-512"

data = [
{
"name": "test erasure rule",
"action_type": ActionType.erasure.value,
"masking_strategy": {
"strategy": HASH,
"configuration": {
"algorithm": HASH_ALGORITHM,
"format_preservation": {"suffix": FORMAT_PRESERVATION_SUFFIX},
},
"strategy": NULL_REWRITE,
"configuration": {},
},
}
]
Expand All @@ -479,7 +476,7 @@ def test_create_erasure_rule_for_policy(
rule_data = response_data[0]
assert "masking_strategy" in rule_data
masking_strategy_data = rule_data["masking_strategy"]
assert masking_strategy_data["strategy"] == HASH
assert masking_strategy_data["strategy"] == NULL_REWRITE
assert "configuration" not in masking_strategy_data

def test_update_rule_policy_id_fails(
Expand Down Expand Up @@ -822,8 +819,8 @@ def test_create_conflicting_rule_targets(
"name": "Erasure Rule",
"policy_id": policy.id,
"masking_strategy": {
"strategy": HASH,
"configuration": {"algorithm": "SHA-512"},
"strategy": NULL_REWRITE,
"configuration": {},
},
},
)
Expand Down
49 changes: 47 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
StorageSecrets,
StorageType,
)
from fidesops.service.masking.strategy.masking_strategy_nullify import NULL_REWRITE
from fidesops.service.masking.strategy.masking_strategy_string_rewrite import STRING_REWRITE
from fidesops.util.cache import FidesopsRedis

logging.getLogger("faker").setLevel(logging.ERROR)
Expand Down Expand Up @@ -222,8 +224,8 @@ def erasure_policy(
"name": "Erasure Rule",
"policy_id": erasure_policy.id,
"masking_strategy": {
"strategy": "hash",
"configuration": {"algorithm": "SHA-512"},
"strategy": "null_rewrite",
"configuration": {},
},
},
)
Expand Down Expand Up @@ -251,6 +253,49 @@ def erasure_policy(
pass


@pytest.fixture(scope="function")
def erasure_policy_two_rules(db: Session, oauth_client: ClientDetail, erasure_policy: Policy) -> Generator:

second_erasure_rule = Rule.create(
db=db,
data={
"action_type": ActionType.erasure.value,
"client_id": oauth_client.id,
"name": "Second Erasure Rule",
"policy_id": erasure_policy.id,
"masking_strategy": {"strategy": NULL_REWRITE, "configuration": {}},
},
)

# TODO set masking strategy in Rule.create() call above, once more masking strategies beyond NULL_REWRITE are supported.
second_erasure_rule.masking_strategy = {
"strategy": STRING_REWRITE,
"configuration": {"rewrite_value": "*****"}
}

second_rule_target = RuleTarget.create(
db=db,
data={
"client_id": oauth_client.id,
"data_category": DataCategory("user.provided.identifiable.contact.email").value,
"rule_id": second_erasure_rule.id,
},
)
yield erasure_policy
try:
second_rule_target.delete(db)
except ObjectDeletedError:
pass
try:
second_erasure_rule.delete(db)
except ObjectDeletedError:
pass
try:
erasure_policy.delete(db)
except ObjectDeletedError:
pass


@pytest.fixture(scope="function")
def policy(
db: Session,
Expand Down
6 changes: 5 additions & 1 deletion tests/graph/graph_test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from fidesops.models.privacy_request import PrivacyRequest
from fidesops.service.connectors import BaseConnector
from fidesops.service.connectors.sql_connector import SQLConnector
from fidesops.service.masking.strategy.masking_strategy_nullify import NullMaskingStrategy
from fidesops.task.graph_task import GraphTask
from fidesops.task.task_resources import TaskResources
from ..fixtures import faker
Expand Down Expand Up @@ -63,7 +64,10 @@ def erasure_policy(*erasure_categories: str) -> Policy:
"""Generate an erasure policy with the given categories"""
policy = Policy()
targets = [RuleTarget(data_category=c) for c in erasure_categories]
policy.rules = [Rule(action_type=ActionType.erasure, targets=targets)]
policy.rules = [Rule(action_type=ActionType.erasure, targets=targets, masking_strategy={
"strategy": "null_rewrite",
"configuration": {},
})]
return policy


Expand Down
Loading