Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

Commit

Permalink
Update Query Builder to use Masking Strategies on Policy Rules [#47] (#…
Browse files Browse the repository at this point in the history
…31)

* First draft: apply masking strategies on rules to erase data categories instead of null.

* Rename categories_to_fields to fields_by_category and add additional examples.

* Restrict to just null masking strategy for now, while we put some of the other pieces in place to support other strategies.
  • Loading branch information
pattisdr authored Nov 8, 2021
1 parent c7a3d00 commit acf592c
Show file tree
Hide file tree
Showing 10 changed files with 477 additions and 120 deletions.
2 changes: 2 additions & 0 deletions data/dataset/mongo_example_test_dataset.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ dataset:
fields:
- name: _id
data_categories: [system.operations]
fidesops_meta:
primary_key: True
- name: customer_id
data_categories: [user.derived.identifiable.unique_id]
fidesops_meta:
Expand Down
22 changes: 22 additions & 0 deletions src/fidesops/graph/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@
"""
from __future__ import annotations

from collections import defaultdict

from typing import List, Optional, Tuple, Set, Dict, Literal
from pydantic import BaseModel

Expand Down Expand Up @@ -223,6 +225,26 @@ def identities(self) -> Dict[str, Tuple[str, ...]]:
flds_w_ident = filter(lambda f: f.identity, self.fields)
return {f.name: f.identity for f in flds_w_ident}

@property
def fields_by_category(self) -> Dict[str, List]:
"""Returns mapping of data categories to fields, flips fields -> categories
to be categories -> fields.
Example:
{
"user.provided.identifiable.contact.city": ["city"],
"user.provided.identifiable.contact.street": ["house", "street"],
"system.operations": ["id"],
"user.provided.identifiable.contact.state": ["state"],
"user.provided.identifiable.contact.postal_code": ["zip"]
}
"""
categories = defaultdict(list)
for field in self.fields:
for category in field.data_categories or []:
categories[category].append(field.name)
return categories

class Config:
"""for pydantic incorporation of custom non-pydantic types"""

Expand Down
4 changes: 1 addition & 3 deletions src/fidesops/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,9 +250,7 @@ def data_category_field_mapping(self) -> Dict[str, Dict[str, List]]:
"""
mapping: Dict[str, Dict[str, List]] = defaultdict(lambda: defaultdict(list))
for node_address, node in self.nodes.items():
for field in node.collection.fields:
for category in field.data_categories:
mapping[str(node_address)][category].append(field.name)
mapping[str(node_address)] = node.collection.fields_by_category
return mapping

def __repr__(self) -> str:
Expand Down
11 changes: 11 additions & 0 deletions src/fidesops/models/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from fidesops.service.masking.strategy.masking_strategy_factory import (
SupportedMaskingStrategies,
)
from fidesops.service.masking.strategy.masking_strategy_nullify import NULL_REWRITE


class ActionType(EnumType):
Expand Down Expand Up @@ -100,6 +101,16 @@ def _validate_rule(
"Erasure Rules must have masking strategies."
)

# Temporary, remove when we have the pieces in place to support more than null masking.
if (
action_type == ActionType.erasure.value
and masking_strategy
and masking_strategy.get("strategy") != NULL_REWRITE
):
raise common_exceptions.RuleValidationError(
"Only the Null Masking Strategy (null_rewrite) is supported at this time."
)

if action_type == ActionType.access.value and storage_destination_id is None:
raise common_exceptions.RuleValidationError(
"Access Rules must have a storage destination."
Expand Down
86 changes: 53 additions & 33 deletions src/fidesops/service/connectors/query_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from fidesops.graph.config import ROOT_COLLECTION_ADDRESS, CollectionAddress
from fidesops.graph.traversal import TraversalNode, Row
from fidesops.models.policy import Policy
from fidesops.models.policy import Policy, ActionType, Rule
from fidesops.service.masking.strategy.masking_strategy_factory import get_strategy
from fidesops.util.collection_util import append

logging.basicConfig(level=logging.INFO)
Expand Down Expand Up @@ -36,29 +37,29 @@ def fields(self) -> List[str]:
"""Fields of interest from this traversal traversal_node."""
return [f.name for f in self.node.node.collection.fields]

def update_fields(self, policy: Policy) -> List[str]:
"""List of update-able field names"""

def exists_child(
field_categories: List[str], policy_categories: List[str]
) -> bool:
"""A not very efficient check for any policy category that matches one of the field categories or a prefix of it."""
if field_categories is None or len(field_categories) == 0:
return False
for policy_category in policy_categories:
for field_category in field_categories:
if field_category.startswith(policy_category):
return True

return False

policy_categories = policy.get_erasure_target_categories()

return [
f.name
for f in self.node.node.collection.fields
if exists_child(f.data_categories, policy_categories)
]
def build_rule_target_fields(self, policy: Policy) -> Dict[Rule, List[str]]:
"""
Return dictionary of rules mapped to update-able field names on a given collection
Example:
{<fidesops.models.policy.Rule object at 0xffff9160e190>: ['name', 'code', 'ccn']}
"""
rule_updates: Dict[Rule, List[str]] = {}
for rule in policy.rules:
if rule.action_type != ActionType.erasure:
continue
rule_categories = rule.get_target_data_categories()
if not rule_categories:
continue

targeted_fields = []
collection_categories = self.node.node.collection.fields_by_category
for rule_cat in rule_categories:
for collection_cat, fields in collection_categories.items():
if collection_cat.startswith(rule_cat):
targeted_fields.extend(fields)
rule_updates[rule] = targeted_fields

return rule_updates

@property
def primary_keys(self) -> List[str]:
Expand Down Expand Up @@ -116,6 +117,28 @@ def display_query_data(self) -> Dict[str, Any]:

return data

def update_value_map(self, row: Row, policy: Policy) -> Dict[str, Any]:
"""Map the relevant fields to be updated on the row with their masked values from Policy Rules
Example return: {'name': None, 'ccn': None, 'code': None}
In this example, a Null Masking Strategy was used to determine that the name/ccn/code fields
for a given customer_id will be replaced with null values.
"""
rule_to_collection_fields = self.build_rule_target_fields(policy)

value_map: Dict[str, Any] = {}
for rule, field_names in rule_to_collection_fields.items():
strategy_config = rule.masking_strategy
strategy = get_strategy(
strategy_config["strategy"], strategy_config["configuration"]
)

for field_name in field_names:
value_map[field_name] = strategy.mask(row[field_name])
return value_map

@abstractmethod
def generate_query(
self, input_data: Dict[str, List[Any]], policy: Optional[Policy]
Expand Down Expand Up @@ -172,16 +195,14 @@ def generate_query(
)
return None

def generate_update_stmt(
self, row: Row, policy: Optional[Policy] = None
) -> Optional[TextClause]:
"""Generate a SQL update statement in the form of a TextClause"""
update_fields = self.update_fields(policy)
update_value_map = {k: None for k in update_fields}
update_clauses = [f"{k} = :{k}" for k in update_fields]
def generate_update_stmt(self, row: Row, policy: Policy) -> Optional[TextClause]:
update_value_map = self.update_value_map(row, policy)
update_clauses = [f"{k} = :{k}" for k in update_value_map]
pk_clauses = [f"{k} = :{k}" for k in self.primary_keys]

for pk in self.primary_keys:
update_value_map[pk] = row[pk]

valid = len(pk_clauses) > 0 and len(update_clauses) > 0
if not valid:
logger.warning(
Expand Down Expand Up @@ -276,8 +297,7 @@ def generate_update_stmt(
self, row: Row, policy: Optional[Policy] = None
) -> Optional[MongoStatement]:
"""Generate a SQL update statement in the form of Mongo update statement components"""
update_fields = self.update_fields(policy)
update_clauses = {k: None for k in update_fields}
update_clauses = self.update_value_map(row, policy)
pk_clauses = {k: row[k] for k in self.primary_keys}

valid = len(pk_clauses) > 0 and len(update_clauses) > 0
Expand Down
17 changes: 7 additions & 10 deletions tests/api/v1/endpoints/test_policy_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
generate_fides_data_categories,
)
from fidesops.service.masking.strategy.masking_strategy_hash import HASH
from fidesops.service.masking.strategy.masking_strategy_nullify import NULL_REWRITE


class TestGetPolicies:
Expand Down Expand Up @@ -451,18 +452,14 @@ def test_create_erasure_rule_for_policy(
generate_auth_header,
policy,
):
FORMAT_PRESERVATION_SUFFIX = "@masked.com"
HASH_ALGORITHM = "SHA-512"

data = [
{
"name": "test erasure rule",
"action_type": ActionType.erasure.value,
"masking_strategy": {
"strategy": HASH,
"configuration": {
"algorithm": HASH_ALGORITHM,
"format_preservation": {"suffix": FORMAT_PRESERVATION_SUFFIX},
},
"strategy": NULL_REWRITE,
"configuration": {},
},
}
]
Expand All @@ -479,7 +476,7 @@ def test_create_erasure_rule_for_policy(
rule_data = response_data[0]
assert "masking_strategy" in rule_data
masking_strategy_data = rule_data["masking_strategy"]
assert masking_strategy_data["strategy"] == HASH
assert masking_strategy_data["strategy"] == NULL_REWRITE
assert "configuration" not in masking_strategy_data

def test_update_rule_policy_id_fails(
Expand Down Expand Up @@ -822,8 +819,8 @@ def test_create_conflicting_rule_targets(
"name": "Erasure Rule",
"policy_id": policy.id,
"masking_strategy": {
"strategy": HASH,
"configuration": {"algorithm": "SHA-512"},
"strategy": NULL_REWRITE,
"configuration": {},
},
},
)
Expand Down
49 changes: 47 additions & 2 deletions tests/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
StorageSecrets,
StorageType,
)
from fidesops.service.masking.strategy.masking_strategy_nullify import NULL_REWRITE
from fidesops.service.masking.strategy.masking_strategy_string_rewrite import STRING_REWRITE
from fidesops.util.cache import FidesopsRedis

logging.getLogger("faker").setLevel(logging.ERROR)
Expand Down Expand Up @@ -222,8 +224,8 @@ def erasure_policy(
"name": "Erasure Rule",
"policy_id": erasure_policy.id,
"masking_strategy": {
"strategy": "hash",
"configuration": {"algorithm": "SHA-512"},
"strategy": "null_rewrite",
"configuration": {},
},
},
)
Expand Down Expand Up @@ -251,6 +253,49 @@ def erasure_policy(
pass


@pytest.fixture(scope="function")
def erasure_policy_two_rules(db: Session, oauth_client: ClientDetail, erasure_policy: Policy) -> Generator:

second_erasure_rule = Rule.create(
db=db,
data={
"action_type": ActionType.erasure.value,
"client_id": oauth_client.id,
"name": "Second Erasure Rule",
"policy_id": erasure_policy.id,
"masking_strategy": {"strategy": NULL_REWRITE, "configuration": {}},
},
)

# TODO set masking strategy in Rule.create() call above, once more masking strategies beyond NULL_REWRITE are supported.
second_erasure_rule.masking_strategy = {
"strategy": STRING_REWRITE,
"configuration": {"rewrite_value": "*****"}
}

second_rule_target = RuleTarget.create(
db=db,
data={
"client_id": oauth_client.id,
"data_category": DataCategory("user.provided.identifiable.contact.email").value,
"rule_id": second_erasure_rule.id,
},
)
yield erasure_policy
try:
second_rule_target.delete(db)
except ObjectDeletedError:
pass
try:
second_erasure_rule.delete(db)
except ObjectDeletedError:
pass
try:
erasure_policy.delete(db)
except ObjectDeletedError:
pass


@pytest.fixture(scope="function")
def policy(
db: Session,
Expand Down
6 changes: 5 additions & 1 deletion tests/graph/graph_test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from fidesops.models.privacy_request import PrivacyRequest
from fidesops.service.connectors import BaseConnector
from fidesops.service.connectors.sql_connector import SQLConnector
from fidesops.service.masking.strategy.masking_strategy_nullify import NullMaskingStrategy
from fidesops.task.graph_task import GraphTask
from fidesops.task.task_resources import TaskResources
from ..fixtures import faker
Expand Down Expand Up @@ -63,7 +64,10 @@ def erasure_policy(*erasure_categories: str) -> Policy:
"""Generate an erasure policy with the given categories"""
policy = Policy()
targets = [RuleTarget(data_category=c) for c in erasure_categories]
policy.rules = [Rule(action_type=ActionType.erasure, targets=targets)]
policy.rules = [Rule(action_type=ActionType.erasure, targets=targets, masking_strategy={
"strategy": "null_rewrite",
"configuration": {},
})]
return policy


Expand Down
Loading

0 comments on commit acf592c

Please sign in to comment.