-
Notifications
You must be signed in to change notification settings - Fork 16
[#743] Store provided identity data in application database #834
Changes from all commits
188c574
18efc9f
13c0de9
37cc67e
38a6f3a
eeb5721
21bf006
cd173c9
e32326d
90c32cf
ddf0d66
bb24862
971c36b
3143de8
9ca5d97
cf6915b
184b106
d3068db
ecedcec
6a8c046
e0034ba
fbd3f10
7d86a6b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
from fidesops.models.policy import ActionType, Policy, Rule, RuleTarget | ||
from fidesops.models.privacy_request import PrivacyRequest, PrivacyRequestStatus | ||
from fidesops.models.storage import ResponseFormat, StorageConfig | ||
from fidesops.schemas.redis_cache import PrivacyRequestIdentity | ||
from fidesops.schemas.storage.storage import FileNaming, StorageDetails, StorageType | ||
from fidesops.util.data_category import DataCategory | ||
|
||
|
@@ -176,7 +177,7 @@ def create_test_data(db: orm.Session) -> FidesUser: | |
|
||
for policy in policies: | ||
for status in PrivacyRequestStatus.__members__.values(): | ||
PrivacyRequest.create( | ||
pr = PrivacyRequest.create( | ||
db=db, | ||
data={ | ||
"external_id": f"ext-{uuid4()}", | ||
|
@@ -188,6 +189,13 @@ def create_test_data(db: orm.Session) -> FidesUser: | |
"client_id": policy.client_id, | ||
}, | ||
) | ||
pr.persist_identity( | ||
db=db, | ||
identity=PrivacyRequestIdentity( | ||
email="[email protected]", | ||
phone_number="+1 234 567 8910", | ||
), | ||
) | ||
|
||
print("Adding connection configs") | ||
_create_connection_configs(db) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
"""adds provided identity table for identity storage and later identity search | ||
|
||
Revision ID: 3c5e1253465d | ||
Revises: fc90277bbcde | ||
Create Date: 2022-07-08 11:53:05.215848 | ||
|
||
""" | ||
import sqlalchemy as sa | ||
import sqlalchemy_utils | ||
from alembic import op | ||
|
||
# revision identifiers, used by Alembic. | ||
revision = "3c5e1253465d" | ||
down_revision = "fc90277bbcde" | ||
branch_labels = None | ||
depends_on = None | ||
|
||
|
||
def upgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.create_table( | ||
"providedidentity", | ||
sa.Column("id", sa.String(length=255), nullable=False), | ||
sa.Column( | ||
"created_at", | ||
sa.DateTime(timezone=True), | ||
server_default=sa.text("now()"), | ||
nullable=True, | ||
), | ||
sa.Column( | ||
"updated_at", | ||
sa.DateTime(timezone=True), | ||
server_default=sa.text("now()"), | ||
nullable=True, | ||
), | ||
sa.Column("privacy_request_id", sa.String(), nullable=False), | ||
sa.Column( | ||
"field_name", | ||
sa.Enum("email", "phone_number", name="providedidentitytype"), | ||
nullable=False, | ||
), | ||
sa.Column("hashed_value", sa.String(), nullable=True), | ||
sa.Column( | ||
"encrypted_value", | ||
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(), | ||
nullable=True, | ||
), | ||
sa.ForeignKeyConstraint( | ||
["privacy_request_id"], | ||
["privacyrequest.id"], | ||
), | ||
sa.PrimaryKeyConstraint("id"), | ||
) | ||
op.create_index( | ||
op.f("ix_providedidentity_hashed_value"), | ||
"providedidentity", | ||
["hashed_value"], | ||
unique=False, | ||
) | ||
op.create_index( | ||
op.f("ix_providedidentity_id"), "providedidentity", ["id"], unique=False | ||
) | ||
# ### end Alembic commands ### | ||
|
||
|
||
def downgrade(): | ||
# ### commands auto generated by Alembic - please adjust! ### | ||
op.drop_index(op.f("ix_providedidentity_id"), table_name="providedidentity") | ||
op.drop_index( | ||
op.f("ix_providedidentity_hashed_value"), table_name="providedidentity" | ||
) | ||
op.drop_table("providedidentity") | ||
# ### end Alembic commands ### |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
from typing import Any, Dict, List, Optional | ||
|
||
from celery.result import AsyncResult | ||
from fideslib.cryptography.cryptographic_util import hash_with_salt | ||
from fideslib.db.base import Base | ||
from fideslib.db.base_class import FidesBase | ||
from fideslib.models.audit_log import AuditLog | ||
|
@@ -17,12 +18,17 @@ | |
from sqlalchemy import Enum as EnumColumn | ||
from sqlalchemy import ForeignKey, String | ||
from sqlalchemy.dialects.postgresql import JSONB | ||
from sqlalchemy.ext.mutable import MutableList | ||
from sqlalchemy.ext.mutable import MutableDict, MutableList | ||
from sqlalchemy.orm import Session, backref, relationship | ||
from sqlalchemy_utils.types.encrypted.encrypted_type import ( | ||
AesGcmEngine, | ||
StringEncryptedType, | ||
) | ||
|
||
from fidesops.api.v1.scope_registry import PRIVACY_REQUEST_CALLBACK_RESUME | ||
from fidesops.common_exceptions import PrivacyRequestPaused | ||
from fidesops.core.config import config | ||
from fidesops.db.base_class import JSONTypeOverride | ||
from fidesops.graph.config import CollectionAddress | ||
from fidesops.models.policy import ( | ||
ActionType, | ||
|
@@ -202,13 +208,16 @@ def create(cls, db: Session, *, data: Dict[str, Any]) -> FidesBase: | |
|
||
def delete(self, db: Session) -> None: | ||
""" | ||
Clean up the cached data related to this privacy request before deleting this | ||
object from the database | ||
Clean up the cached and persisted data related to this privacy request before | ||
deleting this object from the database | ||
""" | ||
cache: FidesopsRedis = get_cache() | ||
all_keys = get_all_cache_keys_for_privacy_request(privacy_request_id=self.id) | ||
for key in all_keys: | ||
cache.delete(key) | ||
|
||
for provided_identity in self.provided_identities: | ||
provided_identity.delete(db=db) | ||
super().delete(db=db) | ||
|
||
def cache_identity(self, identity: PrivacyRequestIdentity) -> None: | ||
|
@@ -222,6 +231,39 @@ def cache_identity(self, identity: PrivacyRequestIdentity) -> None: | |
value, | ||
) | ||
|
||
def persist_identity(self, db: Session, identity: PrivacyRequestIdentity) -> None: | ||
""" | ||
Stores the identity provided with the privacy request in a secure way, compatible with | ||
blind indexing for later searching and audit purposes. | ||
""" | ||
identity_dict: Dict[str, Any] = dict(identity) | ||
for key, value in identity_dict.items(): | ||
if value is not None: | ||
hashed_value = ProvidedIdentity.hash_value(value) | ||
ProvidedIdentity.create( | ||
db=db, | ||
data={ | ||
"privacy_request_id": self.id, | ||
"field_name": key, | ||
# We don't need to manually encrypt this field, it's done at the ORM level | ||
"encrypted_value": {"value": value}, | ||
"hashed_value": hashed_value, | ||
}, | ||
) | ||
|
||
def get_persisted_identity(self) -> PrivacyRequestIdentity: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have a way to get by hashed value yet? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Great spot! I had intended to use this method but you're right — it wouldn't be useful for that because it generates a new There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The method now uses a static There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yay, this looks good to me now. |
||
""" | ||
Retrieves persisted identity fields from the DB. | ||
""" | ||
schema = PrivacyRequestIdentity() | ||
for field in self.provided_identities: | ||
setattr( | ||
schema, | ||
field.field_name.value, | ||
field.encrypted_value["value"], | ||
) | ||
return schema | ||
|
||
def cache_task_id(self, task_id: str) -> None: | ||
"""Sets a task_id for this privacy request's asynchronous execution.""" | ||
cache: FidesopsRedis = get_cache() | ||
|
@@ -493,6 +535,67 @@ def error_processing(self, db: Session) -> None: | |
) | ||
|
||
|
||
class ProvidedIdentityType(EnumType): | ||
"""Enum for privacy request identity types""" | ||
|
||
email = "email" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These are the enums for identity types supported by fidesops, so I'm thinking we should use the enums here, too: https://github.com/ethyca/fidesops/blob/main/src/fidesops/service/drp/drp_fidesops_mapper.py#L26 Something like:
|
||
phone_number = "phone_number" | ||
|
||
|
||
class ProvidedIdentity(Base): # pylint: disable=R0904 | ||
""" | ||
A table for storing identity fields and values provided at privacy request | ||
creation time. | ||
""" | ||
|
||
privacy_request_id = Column( | ||
String, | ||
ForeignKey(PrivacyRequest.id_field_path), | ||
nullable=False, | ||
) | ||
privacy_request = relationship( | ||
PrivacyRequest, | ||
backref="provided_identities", | ||
) # Which privacy request this identity belongs to | ||
|
||
field_name = Column( | ||
EnumColumn(ProvidedIdentityType), | ||
index=False, | ||
nullable=False, | ||
) | ||
hashed_value = Column( | ||
String, | ||
index=True, | ||
unique=False, | ||
nullable=True, | ||
) # This field is used as a blind index for exact match searches | ||
encrypted_value = Column( | ||
MutableDict.as_mutable( | ||
StringEncryptedType( | ||
JSONTypeOverride, | ||
config.security.APP_ENCRYPTION_KEY, | ||
AesGcmEngine, | ||
"pkcs5", | ||
) | ||
), | ||
nullable=True, | ||
) # Type bytea in the db | ||
|
||
@classmethod | ||
def hash_value( | ||
cls, | ||
value: str, | ||
encoding: str = "UTF-8", | ||
) -> tuple[str, str]: | ||
"""Utility function to hash a user's password with a generated salt""" | ||
SALT = "a-salt" | ||
hashed_value = hash_with_salt( | ||
value.encode(encoding), | ||
SALT.encode(encoding), | ||
) | ||
return hashed_value | ||
|
||
|
||
# Unique text to separate a step from a collection address, so we can store two values in one. | ||
PAUSED_SEPARATOR = "__fidesops_paused_sep__" | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,8 +43,12 @@ def test_create_drp_privacy_request( | |
policy_drp_action, | ||
cache, | ||
): | ||
|
||
identity = {"email": "[email protected]"} | ||
TEST_EMAIL = "[email protected]" | ||
TEST_PHONE_NUMBER = "+1 234 567 8910" | ||
identity = { | ||
"email": TEST_EMAIL, | ||
"phone_number": TEST_PHONE_NUMBER, | ||
} | ||
encoded_identity: str = jwt.encode( | ||
identity, config.security.DRP_JWT_SECRET, algorithm="HS256" | ||
) | ||
|
@@ -84,13 +88,17 @@ def test_create_drp_privacy_request( | |
) | ||
assert ( | ||
cache.get(identity_key) | ||
== "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6InRlc3RAZXhhbXBsZS5jb20ifQ.4I8XLWnTYp8oMHjN2ypP3Hpg45DIaGNAEmj1QCYONUI" | ||
== "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6InRlc3RAZXhhbXBsZS5jb20iLCJwaG9uZV9udW1iZXIiOiIrMSAyMzQgNTY3IDg5MTAifQ.kHV4ru6vxQR96Meae31oKIU7mMnTJgt1cnli6GLUBFk" | ||
) | ||
fidesops_identity_key = get_identity_cache_key( | ||
privacy_request_id=pr.id, | ||
identity_attribute="email", | ||
) | ||
assert cache.get(fidesops_identity_key) == identity["email"] | ||
persisted_identity = pr.get_persisted_identity() | ||
assert persisted_identity.email == TEST_EMAIL | ||
assert persisted_identity.phone_number == TEST_PHONE_NUMBER | ||
|
||
pr.delete(db=db) | ||
assert run_access_request_mock.called | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since we need to persist the identity every time we
create
a privacy request, can we call this method fromdef create():
in this same file?This is also more in line with our pattern of deleting the identities within the
def delete()
methodThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is possible, we currently don't handle identity data within that method at all, in favour of caching it separately from the model. The reason I left it that way was because we don't need the identity data in the
PrivacyRequest
table, and those ORM overrides should mainly focus on what that table needs. In the event of deletion, we do need to clear the foreign keys in order to process deleted ofPrivacyRequest
s.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it, OK we can leave as is for now, thanks!