Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

[#743] Store provided identity data in application database #834

Merged
merged 23 commits into from
Jul 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The types of changes are:
* Adds SaaS connection type to SaaS yaml config [748](https://github.com/ethyca/fidesops/pull/748)
* Adds endpoint to get available connectors (database and saas) [#768](https://github.com/ethyca/fidesops/pull/768)
* Adds endpoint to get the secrets required for different connectors [#795](https://github.com/ethyca/fidesops/pull/795)
* Store provided identity data in the privacy request table [#743](https://github.com/ethyca/fidesops/pull/834)

### Developer Experience
* Replace user authentication routes with fideslib routes [#811](https://github.com/ethyca/fidesops/pull/811)
Expand Down
10 changes: 9 additions & 1 deletion create_test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from fidesops.models.policy import ActionType, Policy, Rule, RuleTarget
from fidesops.models.privacy_request import PrivacyRequest, PrivacyRequestStatus
from fidesops.models.storage import ResponseFormat, StorageConfig
from fidesops.schemas.redis_cache import PrivacyRequestIdentity
from fidesops.schemas.storage.storage import FileNaming, StorageDetails, StorageType
from fidesops.util.data_category import DataCategory

Expand Down Expand Up @@ -176,7 +177,7 @@ def create_test_data(db: orm.Session) -> FidesUser:

for policy in policies:
for status in PrivacyRequestStatus.__members__.values():
PrivacyRequest.create(
pr = PrivacyRequest.create(
db=db,
data={
"external_id": f"ext-{uuid4()}",
Expand All @@ -188,6 +189,13 @@ def create_test_data(db: orm.Session) -> FidesUser:
"client_id": policy.client_id,
},
)
pr.persist_identity(
db=db,
identity=PrivacyRequestIdentity(
email="[email protected]",
phone_number="+1 234 567 8910",
),
)

print("Adding connection configs")
_create_connection_configs(db)
Expand Down
17 changes: 11 additions & 6 deletions src/fidesops/api/v1/endpoints/drp_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,12 +89,6 @@ def create_drp_privacy_request(
)

try:
privacy_request: PrivacyRequest = PrivacyRequest.create(
db=db, data=privacy_request_kwargs
)

logger.info(f"Decrypting identity for DRP privacy request {privacy_request.id}")

decrypted_identity: DrpIdentity = DrpIdentity(
**jwt.decode(data.identity, jwt_key, algorithms=["HS256"])
)
Expand All @@ -103,6 +97,17 @@ def create_drp_privacy_request(
drp_identity=decrypted_identity
)

privacy_request: PrivacyRequest = PrivacyRequest.create(
db=db,
data=privacy_request_kwargs,
)
privacy_request.persist_identity(
db=db,
identity=mapped_identity,
)

logger.info(f"Decrypting identity for DRP privacy request {privacy_request.id}")

cache_data(privacy_request, policy, mapped_identity, None, data)

queue_privacy_request(privacy_request.id)
Expand Down
3 changes: 3 additions & 0 deletions src/fidesops/api/v1/endpoints/privacy_request_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,9 @@ def create_privacy_request(

try:
privacy_request: PrivacyRequest = PrivacyRequest.create(db=db, data=kwargs)
privacy_request.persist_identity(
db=db, identity=privacy_request_data.identity
)

cache_data(
privacy_request,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
"""adds provided identity table for identity storage and later identity search

Revision ID: 3c5e1253465d
Revises: fc90277bbcde
Create Date: 2022-07-08 11:53:05.215848

"""
import sqlalchemy as sa
import sqlalchemy_utils
from alembic import op

# revision identifiers, used by Alembic.
revision = "3c5e1253465d"
down_revision = "fc90277bbcde"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table(
"providedidentity",
sa.Column("id", sa.String(length=255), nullable=False),
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column("privacy_request_id", sa.String(), nullable=False),
sa.Column(
"field_name",
sa.Enum("email", "phone_number", name="providedidentitytype"),
nullable=False,
),
sa.Column("hashed_value", sa.String(), nullable=True),
sa.Column(
"encrypted_value",
sqlalchemy_utils.types.encrypted.encrypted_type.StringEncryptedType(),
nullable=True,
),
sa.ForeignKeyConstraint(
["privacy_request_id"],
["privacyrequest.id"],
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_providedidentity_hashed_value"),
"providedidentity",
["hashed_value"],
unique=False,
)
op.create_index(
op.f("ix_providedidentity_id"), "providedidentity", ["id"], unique=False
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f("ix_providedidentity_id"), table_name="providedidentity")
op.drop_index(
op.f("ix_providedidentity_hashed_value"), table_name="providedidentity"
)
op.drop_table("providedidentity")
# ### end Alembic commands ###
109 changes: 106 additions & 3 deletions src/fidesops/models/privacy_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Any, Dict, List, Optional

from celery.result import AsyncResult
from fideslib.cryptography.cryptographic_util import hash_with_salt
from fideslib.db.base import Base
from fideslib.db.base_class import FidesBase
from fideslib.models.audit_log import AuditLog
Expand All @@ -17,12 +18,17 @@
from sqlalchemy import Enum as EnumColumn
from sqlalchemy import ForeignKey, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.ext.mutable import MutableList
from sqlalchemy.ext.mutable import MutableDict, MutableList
from sqlalchemy.orm import Session, backref, relationship
from sqlalchemy_utils.types.encrypted.encrypted_type import (
AesGcmEngine,
StringEncryptedType,
)

from fidesops.api.v1.scope_registry import PRIVACY_REQUEST_CALLBACK_RESUME
from fidesops.common_exceptions import PrivacyRequestPaused
from fidesops.core.config import config
from fidesops.db.base_class import JSONTypeOverride
from fidesops.graph.config import CollectionAddress
from fidesops.models.policy import (
ActionType,
Expand Down Expand Up @@ -202,13 +208,16 @@ def create(cls, db: Session, *, data: Dict[str, Any]) -> FidesBase:

def delete(self, db: Session) -> None:
"""
Clean up the cached data related to this privacy request before deleting this
object from the database
Clean up the cached and persisted data related to this privacy request before
deleting this object from the database
"""
cache: FidesopsRedis = get_cache()
all_keys = get_all_cache_keys_for_privacy_request(privacy_request_id=self.id)
for key in all_keys:
cache.delete(key)

for provided_identity in self.provided_identities:
provided_identity.delete(db=db)
super().delete(db=db)

def cache_identity(self, identity: PrivacyRequestIdentity) -> None:
Expand All @@ -222,6 +231,39 @@ def cache_identity(self, identity: PrivacyRequestIdentity) -> None:
value,
)

def persist_identity(self, db: Session, identity: PrivacyRequestIdentity) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we need to persist the identity every time we create a privacy request, can we call this method from def create(): in this same file?

This is also more in line with our pattern of deleting the identities within the def delete() method

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is possible, we currently don't handle identity data within that method at all, in favour of caching it separately from the model. The reason I left it that way was because we don't need the identity data in the PrivacyRequest table, and those ORM overrides should mainly focus on what that table needs. In the event of deletion, we do need to clear the foreign keys in order to process deleted of PrivacyRequests.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Got it, OK we can leave as is for now, thanks!

"""
Stores the identity provided with the privacy request in a secure way, compatible with
blind indexing for later searching and audit purposes.
"""
identity_dict: Dict[str, Any] = dict(identity)
for key, value in identity_dict.items():
if value is not None:
hashed_value = ProvidedIdentity.hash_value(value)
ProvidedIdentity.create(
db=db,
data={
"privacy_request_id": self.id,
"field_name": key,
# We don't need to manually encrypt this field, it's done at the ORM level
"encrypted_value": {"value": value},
"hashed_value": hashed_value,
},
)

def get_persisted_identity(self) -> PrivacyRequestIdentity:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have a way to get by hashed value yet?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great spot! I had intended to use this method but you're right — it wouldn't be useful for that because it generates a new salt each time without the option to refer to ProvidedIdentity.salt. Will fix 👍

Copy link
Contributor Author

@seanpreston seanpreston Jul 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The method now uses a static SALT. This way we can consistently hash values we're searching for to see if they exist in a hashed form in the table.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yay, this looks good to me now.

"""
Retrieves persisted identity fields from the DB.
"""
schema = PrivacyRequestIdentity()
for field in self.provided_identities:
setattr(
schema,
field.field_name.value,
field.encrypted_value["value"],
)
return schema

def cache_task_id(self, task_id: str) -> None:
"""Sets a task_id for this privacy request's asynchronous execution."""
cache: FidesopsRedis = get_cache()
Expand Down Expand Up @@ -493,6 +535,67 @@ def error_processing(self, db: Session) -> None:
)


class ProvidedIdentityType(EnumType):
"""Enum for privacy request identity types"""

email = "email"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are the enums for identity types supported by fidesops, so I'm thinking we should use the enums here, too: https://github.com/ethyca/fidesops/blob/main/src/fidesops/service/drp/drp_fidesops_mapper.py#L26

Something like:

DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP: Dict[str, str] = {
            "email": ProvidedIdentityType.email.value,
            "phone_number": ProvidedIdentityType.phone_number.value,
        }

phone_number = "phone_number"


class ProvidedIdentity(Base): # pylint: disable=R0904
"""
A table for storing identity fields and values provided at privacy request
creation time.
"""

privacy_request_id = Column(
String,
ForeignKey(PrivacyRequest.id_field_path),
nullable=False,
)
privacy_request = relationship(
PrivacyRequest,
backref="provided_identities",
) # Which privacy request this identity belongs to

field_name = Column(
EnumColumn(ProvidedIdentityType),
index=False,
nullable=False,
)
hashed_value = Column(
String,
index=True,
unique=False,
nullable=True,
) # This field is used as a blind index for exact match searches
encrypted_value = Column(
MutableDict.as_mutable(
StringEncryptedType(
JSONTypeOverride,
config.security.APP_ENCRYPTION_KEY,
AesGcmEngine,
"pkcs5",
)
),
nullable=True,
) # Type bytea in the db

@classmethod
def hash_value(
cls,
value: str,
encoding: str = "UTF-8",
) -> tuple[str, str]:
"""Utility function to hash a user's password with a generated salt"""
SALT = "a-salt"
hashed_value = hash_with_salt(
value.encode(encoding),
SALT.encode(encoding),
)
return hashed_value


# Unique text to separate a step from a collection address, so we can store two values in one.
PAUSED_SEPARATOR = "__fidesops_paused_sep__"

Expand Down
14 changes: 9 additions & 5 deletions src/fidesops/service/drp/drp_fidesops_mapper.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Dict

from fidesops.models.privacy_request import PrivacyRequestStatus
from fidesops.models.privacy_request import PrivacyRequestStatus, ProvidedIdentityType
from fidesops.schemas.drp_privacy_request import DrpIdentity
from fidesops.schemas.privacy_request import PrivacyRequestDRPStatus
from fidesops.schemas.redis_cache import PrivacyRequestIdentity
Expand All @@ -22,17 +22,21 @@ def map_identity(drp_identity: DrpIdentity) -> PrivacyRequestIdentity:
This class also allows us to implement custom logic to handle "verified" id props.
"""
fidesops_identity_kwargs: Dict[str, str] = {}
DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP: Dict[str, str] = {
"email": "email",
"phone_number": "phone_number",
DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP: Dict[
str, ProvidedIdentityType
] = {
"email": ProvidedIdentityType.email,
"phone_number": ProvidedIdentityType.phone_number,
}
for attr, val in drp_identity.__dict__.items():
if attr not in DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP:
logger.warning(
f"Identity attribute of {attr} is not supported by Fidesops at this time. Continuing to use other identity props, if provided."
)
else:
fidesops_prop: str = DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP[attr]
fidesops_prop: str = DRP_TO_FIDESOPS_SUPPORTED_IDENTITY_PROPS_MAP[
attr
].value
fidesops_identity_kwargs[fidesops_prop] = val
return PrivacyRequestIdentity(**fidesops_identity_kwargs)

Expand Down
5 changes: 5 additions & 0 deletions src/fidesops/service/privacy_request/onetrust_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,12 @@ def _create_privacy_request( # pylint: disable=R0913
"client_id": onetrust_policy.client_id,
"external_id": subtask_id,
}

privacy_request: PrivacyRequest = PrivacyRequest.create(db=db, data=kwargs)
privacy_request.persist_identity(
db=db,
identity=PrivacyRequestIdentity(email=identity.email),
)
privacy_request.cache_identity(identity)
try:
queue_privacy_request(privacy_request_id=privacy_request.id)
Expand Down
14 changes: 11 additions & 3 deletions tests/api/v1/endpoints/test_drp_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,12 @@ def test_create_drp_privacy_request(
policy_drp_action,
cache,
):

identity = {"email": "[email protected]"}
TEST_EMAIL = "[email protected]"
TEST_PHONE_NUMBER = "+1 234 567 8910"
identity = {
"email": TEST_EMAIL,
"phone_number": TEST_PHONE_NUMBER,
}
encoded_identity: str = jwt.encode(
identity, config.security.DRP_JWT_SECRET, algorithm="HS256"
)
Expand Down Expand Up @@ -84,13 +88,17 @@ def test_create_drp_privacy_request(
)
assert (
cache.get(identity_key)
== "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6InRlc3RAZXhhbXBsZS5jb20ifQ.4I8XLWnTYp8oMHjN2ypP3Hpg45DIaGNAEmj1QCYONUI"
== "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJlbWFpbCI6InRlc3RAZXhhbXBsZS5jb20iLCJwaG9uZV9udW1iZXIiOiIrMSAyMzQgNTY3IDg5MTAifQ.kHV4ru6vxQR96Meae31oKIU7mMnTJgt1cnli6GLUBFk"
)
fidesops_identity_key = get_identity_cache_key(
privacy_request_id=pr.id,
identity_attribute="email",
)
assert cache.get(fidesops_identity_key) == identity["email"]
persisted_identity = pr.get_persisted_identity()
assert persisted_identity.email == TEST_EMAIL
assert persisted_identity.phone_number == TEST_PHONE_NUMBER

pr.delete(db=db)
assert run_access_request_mock.called

Expand Down
Loading