diff --git a/data/dataset/snowflake_example_test_dataset.yml b/data/dataset/snowflake_example_test_dataset.yml new file mode 100644 index 000000000..4860a017a --- /dev/null +++ b/data/dataset/snowflake_example_test_dataset.yml @@ -0,0 +1,229 @@ +dataset: + - fides_key: snowflake_example_test_dataset + name: Snowflake Example Test Dataset + description: Example of a Snowflake dataset containing a variety of related tables like customers, products, addresses, etc. + collections: + - name: address + fields: + - name: city + data_categories: [user.provided.identifiable.contact.city] + - name: house + data_categories: [user.provided.identifiable.contact.street] + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: state + data_categories: [user.provided.identifiable.contact.state] + - name: street + data_categories: [user.provided.identifiable.contact.street] + - name: zip + data_categories: [user.provided.identifiable.contact.postal_code] + + - name: customer + fields: + - name: address_id + data_categories: [system.operations] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: address.id + direction: to + - name: created + data_categories: [system.operations] + - name: email + data_categories: [user.provided.identifiable.contact.email] + fidesops_meta: + identity: email + data_type: string + - name: id + data_categories: [user.derived.identifiable.unique_id] + fidesops_meta: + primary_key: True + - name: name + data_categories: [user.provided.identifiable.name] + fidesops_meta: + data_type: string + length: 40 + - name: variant_eg + # We use this data category so we can target this column from + # our Snowflake tests + data_categories: [user.provided.identifiable.name] + + - name: employee + fields: + - name: address_id + data_categories: [system.operations] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: address.id + direction: to + - name: email + data_categories: [user.provided.identifiable.contact.email] + fidesops_meta: + identity: email + data_type: string + - name: id + data_categories: [user.derived.identifiable.unique_id] + fidesops_meta: + primary_key: True + - name: name + data_categories: [user.provided.identifiable.name] + fidesops_meta: + data_type: string + + - name: login + fields: + - name: customer_id + data_categories: [user.derived.identifiable.unique_id] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: customer.id + direction: from + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: time + data_categories: [user.derived.nonidentifiable.sensor] + + - name: order + fields: + - name: customer_id + data_categories: [user.derived.identifiable.unique_id] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: customer.id + direction: from + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: shipping_address_id + data_categories: [system.operations] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: address.id + direction: to + + # order_item + - name: order_item + fields: + - name: order_id + data_categories: [system.operations] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: order.id + direction: from + - name: product_id + data_categories: [system.operations] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: product.id + direction: to + - name: quantity + data_categories: [system.operations] + + - name: payment_card + fields: + - name: billing_address_id + data_categories: [system.operations] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: address.id + direction: to + - name: ccn + data_categories: [user.provided.identifiable.financial.account_number] + - name: code + data_categories: [user.provided.identifiable.financial] + - name: customer_id + data_categories: [user.derived.identifiable.unique_id] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: customer.id + direction: from + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: name + data_categories: [user.provided.identifiable.financial] + - name: preferred + data_categories: [user.provided.nonidentifiable] + + - name: product + fields: + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: name + data_categories: [system.operations] + - name: price + data_categories: [system.operations] + + - name: report + fields: + - name: email + data_categories: [user.provided.identifiable.contact.email] + fidesops_meta: + identity: email + data_type: string + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: month + data_categories: [system.operations] + - name: name + data_categories: [system.operations] + - name: total_visits + data_categories: [system.operations] + - name: year + data_categories: [system.operations] + + - name: service_request + fields: + - name: alt_email + data_categories: [user.provided.identifiable.contact.email] + fidesops_meta: + identity: email + data_type: string + - name: closed + data_categories: [system.operations] + - name: email + data_categories: [system.operations] + fidesops_meta: + identity: email + data_type: string + - name: employee_id + data_categories: [user.derived.identifiable.unique_id] + fidesops_meta: + references: + - dataset: snowflake_example_test_dataset + field: employee.id + direction: from + - name: id + data_categories: [system.operations] + fidesops_meta: + primary_key: True + - name: opened + data_categories: [system.operations] + + - name: visit + fields: + - name: email + data_categories: [user.provided.identifiable.contact.email] + fidesops_meta: + identity: email + data_type: string + - name: last_visit + data_categories: [system.operations] diff --git a/src/fidesops/core/config.py b/src/fidesops/core/config.py index fe85853e0..4298eee83 100644 --- a/src/fidesops/core/config.py +++ b/src/fidesops/core/config.py @@ -174,6 +174,8 @@ class FidesopsConfig(FidesSettings): security: SecuritySettings execution: ExecutionSettings + is_test_mode: bool = os.getenv("TESTING") == "True" + class Config: # pylint: disable=C0115 case_sensitive = True diff --git a/src/fidesops/db/session.py b/src/fidesops/db/session.py index 8d0d62eed..eaf39ccbb 100644 --- a/src/fidesops/db/session.py +++ b/src/fidesops/db/session.py @@ -1,5 +1,4 @@ import logging -import os from typing import Optional from sqlalchemy import create_engine @@ -20,7 +19,7 @@ def get_db_engine(database_uri: Optional[str] = None) -> Engine: """ if database_uri is None: # Don't override any database_uri explicity passed in - if os.getenv("TESTING"): + if config.is_test_mode: database_uri = config.database.SQLALCHEMY_TEST_DATABASE_URI else: database_uri = config.database.SQLALCHEMY_DATABASE_URI diff --git a/src/fidesops/models/connectionconfig.py b/src/fidesops/models/connectionconfig.py index df628bfbc..75e0a673c 100644 --- a/src/fidesops/models/connectionconfig.py +++ b/src/fidesops/models/connectionconfig.py @@ -1,5 +1,6 @@ import enum from datetime import datetime +from typing import Optional from sqlalchemy import ( Column, @@ -18,7 +19,10 @@ from fidesops.core.config import config -from fidesops.db.base_class import Base, JSONTypeOverride +from fidesops.db.base_class import ( + Base, + JSONTypeOverride, +) class TestStatus(enum.Enum): @@ -88,3 +92,10 @@ def update_test_status(self, test_status: TestStatus, db: Session) -> None: self.last_test_timestamp = datetime.now() self.last_test_succeeded = test_status == TestStatus.succeeded self.save(db) + + def delete(self, db: Session) -> Optional[Base]: + """Hard deletes datastores that map this ConnectionConfig.""" + for dataset in self.datasets: + dataset.delete(db=db) + + return super().delete(db=db) diff --git a/src/fidesops/service/connectors/base_connector.py b/src/fidesops/service/connectors/base_connector.py index b939729fd..d8e6aeaaf 100644 --- a/src/fidesops/service/connectors/base_connector.py +++ b/src/fidesops/service/connectors/base_connector.py @@ -2,6 +2,7 @@ from abc import abstractmethod, ABC from typing import Any, Dict, List, Optional, TypeVar, Generic +from fidesops.core.config import config from fidesops.graph.traversal import Row, TraversalNode from fidesops.models.connectionconfig import ConnectionConfig, TestStatus from fidesops.models.policy import Policy @@ -29,6 +30,11 @@ class BaseConnector(Generic[DB_CONNECTOR_TYPE], ABC): def __init__(self, configuration: ConnectionConfig): self.configuration = configuration + # If Fidesops is running in test mode, it's OK to show + # parameters inside queries for debugging purposes. By + # default we assume that Fidesops is not running in test + # mode. + self.hide_parameters = not config.is_test_mode self.db_client: Optional[DB_CONNECTOR_TYPE] = None @abstractmethod diff --git a/src/fidesops/service/connectors/query_config.py b/src/fidesops/service/connectors/query_config.py index 147f15f42..d0d475f18 100644 --- a/src/fidesops/service/connectors/query_config.py +++ b/src/fidesops/service/connectors/query_config.py @@ -229,8 +229,41 @@ def generate_update_stmt(self, row: Row, policy: Optional[Policy]) -> Optional[T class SQLQueryConfig(QueryConfig[TextClause]): """Query config that translates parameters into SQL statements.""" + def format_fields_for_query( + self, + fields: List[str], + ) -> List[str]: + """Returns fields in a format they can be added into SQL queries.""" + return fields + + def format_clause_for_query( + self, + field_name: str, + operator: str, + ) -> str: + """Returns clauses in a format they can be added into SQL queries.""" + return f"{field_name} {operator} :{field_name}" + + def get_formatted_query_string( + self, + field_list: str, + clauses: List[str], + ) -> str: + """Returns an SQL query string.""" + return f"SELECT {field_list} FROM {self.node.node.collection.name} WHERE {' OR '.join(clauses)}" + + def get_formatted_update_stmt( + self, + update_clauses: List[str], + pk_clauses: List[str], + ) -> str: + """Returns a formatted SQL UPDATE statement to fit the Snowflake syntax.""" + return f"UPDATE {self.node.address.collection} SET {','.join(update_clauses)} WHERE {' AND '.join(pk_clauses)}" + def generate_query( - self, input_data: Dict[str, List[Any]], policy: Optional[Policy] = None + self, + input_data: Dict[str, List[Any]], + policy: Optional[Policy] = None, ) -> Optional[TextClause]: """Generate a retrieval query""" @@ -239,19 +272,21 @@ def generate_query( if filtered_data: clauses = [] query_data: Dict[str, Tuple[Any, ...]] = {} - field_list = ",".join(self.fields) + formatted_fields = self.format_fields_for_query(self.fields) + field_list = ",".join(formatted_fields) for field_name, data in filtered_data.items(): + data = set(data) if len(data) == 1: - clauses.append(f"{field_name} = :{field_name}") - query_data[field_name] = (data[0],) + clauses.append(self.format_clause_for_query(field_name, "=")) + query_data[field_name] = (data.pop(),) elif len(data) > 1: - clauses.append(f"{field_name} IN :{field_name}") - query_data[field_name] = tuple(set(data)) + clauses.append(self.format_clause_for_query(field_name, "IN")) + query_data[field_name] = tuple(data) else: # if there's no data, create no clause pass if len(clauses) > 0: - query_str = f"SELECT {field_list} FROM {self.node.node.collection.name} WHERE {' OR '.join(clauses)}" + query_str = self.get_formatted_query_string(field_list, clauses) return text(query_str).params(query_data) logger.warning( @@ -259,9 +294,14 @@ def generate_query( ) return None + def format_key_map_for_update_stmt(self, key_map: Dict[str, Any]) -> List[str]: + """Adds the appropriate formatting for update statements in this datastore.""" + return [f"{k} = :{k}" for k in key_map] + def generate_update_stmt(self, row: Row, policy: Policy) -> Optional[TextClause]: + """Returns an update statement in generic SQL dialect.""" update_value_map = self.update_value_map(row, policy) - update_clauses = [f"{k} = :{k}" for k in update_value_map] + update_clauses = self.format_key_map_for_update_stmt(update_value_map) non_empty_primary_keys = filter_nonempty_values( { f.name: f.cast(row[f.name]) @@ -269,7 +309,7 @@ def generate_update_stmt(self, row: Row, policy: Policy) -> Optional[TextClause] if f.name in row } ) - pk_clauses = [f"{k} = :{k}" for k in non_empty_primary_keys] + pk_clauses = self.format_key_map_for_update_stmt(non_empty_primary_keys) for k, v in non_empty_primary_keys.items(): update_value_map[k] = v @@ -280,7 +320,11 @@ def generate_update_stmt(self, row: Row, policy: Policy) -> Optional[TextClause] f"There is not enough data to generate a valid update statement for {self.node.address}" ) return None - query_str = f"UPDATE {self.node.address.collection} SET {','.join(update_clauses)} WHERE {' AND '.join(pk_clauses)}" + + query_str = self.get_formatted_update_stmt( + update_clauses, + pk_clauses, + ) logger.info("query = %s, params = %s", query_str, update_value_map) return text(query_str).params(update_value_map) @@ -301,6 +345,7 @@ def transform_param(p: Any) -> str: return query_str def dry_run_query(self) -> Optional[str]: + """Returns a text representation of the query.""" query_data = self.display_query_data() text_clause = self.generate_query(query_data, None) if text_clause is not None: @@ -308,6 +353,45 @@ def dry_run_query(self) -> Optional[str]: return None +class SnowflakeQueryConfig(SQLQueryConfig): + """Generates SQL in Snowflake's custom dialect.""" + + def format_fields_for_query( + self, + fields: List[str], + ) -> List[str]: + """Returns fields surrounded by quotation marks as required by Snowflake syntax.""" + return [f'"{field}"' for field in fields] + + def format_clause_for_query( + self, + field_name: str, + operator: str, + ) -> str: + """Returns field names in clauses surrounded by quotation marks as required by Snowflake syntax.""" + return f'"{field_name}" {operator} (:{field_name})' + + def get_formatted_query_string( + self, + field_list: str, + clauses: List[str], + ) -> str: + """Returns a query string with double quotation mark formatting as required by Snowflake syntax.""" + return f'SELECT {field_list} FROM "{self.node.node.collection.name}" WHERE {" OR ".join(clauses)}' + + def format_key_map_for_update_stmt(self, key_map: Dict[str, Any]) -> List[str]: + """Adds the appropriate formatting for update statements in this datastore.""" + return [f'"{k}" = :{k}' for k in key_map] + + def get_formatted_update_stmt( + self, + update_clauses: List[str], + pk_clauses: List[str], + ) -> str: + """Returns a parameterised update statement in Snowflake dialect.""" + return f'UPDATE "{self.node.address.collection}" SET {",".join(update_clauses)} WHERE {" AND ".join(pk_clauses)}' + + MongoStatement = Tuple[Dict[str, Any], Dict[str, Any]] """A mongo query is expressed in the form of 2 dicts, the first of which represents the query object(s) and the second of which represents fields to return. diff --git a/src/fidesops/service/connectors/sql_connector.py b/src/fidesops/service/connectors/sql_connector.py index c605b37bc..05f3fcee4 100644 --- a/src/fidesops/service/connectors/sql_connector.py +++ b/src/fidesops/service/connectors/sql_connector.py @@ -20,7 +20,10 @@ MySQLSchema, ) from fidesops.service.connectors.base_connector import BaseConnector -from fidesops.service.connectors.query_config import SQLQueryConfig +from fidesops.service.connectors.query_config import ( + SnowflakeQueryConfig, + SQLQueryConfig, +) logger = logging.getLogger(__name__) @@ -125,7 +128,11 @@ def create_client(self) -> Engine: """Returns a SQLAlchemy Engine that can be used to interact with a PostgreSQL database""" config = PostgreSQLSchema(**self.configuration.secrets or {}) uri = config.url or self.build_uri() - return create_engine(uri, hide_parameters=True) + return create_engine( + uri, + hide_parameters=self.hide_parameters, + echo=not self.hide_parameters, + ) class MySQLConnector(SQLConnector): @@ -151,7 +158,11 @@ def create_client(self) -> Engine: """Returns a SQLAlchemy Engine that can be used to interact with a MySQL database""" config = MySQLSchema(**self.configuration.secrets or {}) uri = config.url or self.build_uri() - return create_engine(uri, hide_parameters=True) + return create_engine( + uri, + hide_parameters=self.hide_parameters, + echo=not self.hide_parameters, + ) class RedshiftConnector(SQLConnector): @@ -170,7 +181,11 @@ def create_client(self) -> Engine: """Returns a SQLAlchemy Engine that can be used to interact with an Amazon Redshift cluster""" config = RedshiftSchema(**self.configuration.secrets or {}) uri = config.url or self.build_uri() - return create_engine(uri, hide_parameters=True) + return create_engine( + uri, + hide_parameters=self.hide_parameters, + echo=not self.hide_parameters, + ) class SnowflakeConnector(SQLConnector): @@ -206,4 +221,12 @@ def create_client(self) -> Engine: """Returns a SQLAlchemy Engine that can be used to interact with Snowflake""" config = SnowflakeSchema(**self.configuration.secrets or {}) uri: str = config.url or self.build_uri() - return create_engine(uri, hide_parameters=True) + return create_engine( + uri, + hide_parameters=self.hide_parameters, + echo=not self.hide_parameters, + ) + + def query_config(self, node: TraversalNode) -> SQLQueryConfig: + """Query wrapper corresponding to the input traversal_node.""" + return SnowflakeQueryConfig(node) diff --git a/src/fidesops/task/task_resources.py b/src/fidesops/task/task_resources.py index 9e6d6f4ed..dbdeeaab8 100644 --- a/src/fidesops/task/task_resources.py +++ b/src/fidesops/task/task_resources.py @@ -14,9 +14,10 @@ from fidesops.models.privacy_request import PrivacyRequest from fidesops.service.connectors import ( BaseConnector, - PostgreSQLConnector, MongoDBConnector, MySQLConnector, + PostgreSQLConnector, + SnowflakeConnector, ) from fidesops.util.cache import get_cache @@ -47,6 +48,8 @@ def build_connector(connection_config: ConnectionConfig) -> BaseConnector: return MongoDBConnector(connection_config) if connection_config.connection_type == ConnectionType.mysql: return MySQLConnector(connection_config) + if connection_config.connection_type == ConnectionType.snowflake: + return SnowflakeConnector(connection_config) raise NotImplementedError( f"No connector available for {connection_config.connection_type}" ) diff --git a/src/fidesops/util/logger.py b/src/fidesops/util/logger.py index bacaf491e..141a28208 100644 --- a/src/fidesops/util/logger.py +++ b/src/fidesops/util/logger.py @@ -43,6 +43,15 @@ def factory( # pylint: disable=R0913 return factory +def _can_log_pii() -> bool: + """ + Returns whether Fidesops in its current state can be permitted + to output any PII to the logs. Right now this is being allowed + in test mode only. + """ + return os.getenv("TESTING") == "True" + + def _mask_pii_for_logs(parameter: Any) -> Any: """ :param parameter: param that contains possible pii @@ -53,4 +62,8 @@ def _mask_pii_for_logs(parameter: Any) -> Any: """ if isinstance(parameter, (NotPii, Number)): return parameter + + if _can_log_pii(): + return parameter + return MASKED diff --git a/src/migrations/env.py b/src/migrations/env.py index 53472f410..c9881a371 100644 --- a/src/migrations/env.py +++ b/src/migrations/env.py @@ -44,7 +44,7 @@ def get_url(): """ database_uri = fides_config.database.SQLALCHEMY_DATABASE_URI - if os.getenv("TESTING"): + if fides_config.is_test_mode: database_uri = fides_config.database.SQLALCHEMY_TEST_DATABASE_URI return database_uri diff --git a/tests/api/v1/endpoints/test_dataset_endpoints.py b/tests/api/v1/endpoints/test_dataset_endpoints.py index 32a9b3b01..a423ab014 100644 --- a/tests/api/v1/endpoints/test_dataset_endpoints.py +++ b/tests/api/v1/endpoints/test_dataset_endpoints.py @@ -34,7 +34,7 @@ def _reject_key(dict: Dict, key: str) -> Dict: def test_example_datasets(example_datasets): """Ensure the test fixture loads the right sample data""" assert example_datasets - assert len(example_datasets) == 2 + assert len(example_datasets) == 3 assert example_datasets[0]["fides_key"] == "postgres_example_test_dataset" assert len(example_datasets[0]["collections"]) == 11 assert example_datasets[1]["fides_key"] == "mongo_test" @@ -425,7 +425,7 @@ def test_patch_datasets_bulk_create( assert response.status_code == 200 response_body = json.loads(response.text) - assert len(response_body["succeeded"]) == 2 + assert len(response_body["succeeded"]) == 3 assert len(response_body["failed"]) == 0 # Confirm that the created dataset matches the values we provided @@ -488,7 +488,7 @@ def test_patch_datasets_bulk_update( assert response.status_code == 200 response_body = json.loads(response.text) - assert len(response_body["succeeded"]) == 2 + assert len(response_body["succeeded"]) == 3 assert len(response_body["failed"]) == 0 postgres_dataset = response_body["succeeded"][0] @@ -513,8 +513,20 @@ def test_patch_datasets_bulk_update( assert mongo_config is not None assert mongo_config.updated_at is not None + snowflake_dataset = response_body["succeeded"][2] + assert snowflake_dataset["fides_key"] == "snowflake_example_test_dataset" + assert "birthday" not in [ + f["name"] for f in snowflake_dataset["collections"][0]["fields"] + ] + snowflake_config = DatasetConfig.get_by( + db=db, field="fides_key", value="snowflake_example_test_dataset" + ) + assert snowflake_config is not None + assert snowflake_config.updated_at is not None + postgres_config.delete(db) mongo_config.delete(db) + snowflake_config.delete(db) @mock.patch("fidesops.models.datasetconfig.DatasetConfig.create_or_update") def test_patch_datasets_failed_response( @@ -533,20 +545,14 @@ def test_patch_datasets_failed_response( assert response.status_code == 200 # Returns 200 regardless response_body = json.loads(response.text) assert len(response_body["succeeded"]) == 0 - assert len(response_body["failed"]) == 2 + assert len(response_body["failed"]) == 3 for failed_response in response_body["failed"]: assert "Dataset create/update failed" in failed_response["message"] assert set(failed_response.keys()) == {"message", "data"} - assert ( - response_body["failed"][0]["data"]["fides_key"] - == example_datasets[0]["fides_key"] - ) - assert ( - response_body["failed"][1]["data"]["fides_key"] - == example_datasets[1]["fides_key"] - ) + for index, failed in enumerate(response_body["failed"]): + assert failed["data"]["fides_key"] == example_datasets[index]["fides_key"] class TestGetDatasets: diff --git a/tests/api/v1/endpoints/test_privacy_request_endpoints.py b/tests/api/v1/endpoints/test_privacy_request_endpoints.py index 883223a90..70fdc54a6 100644 --- a/tests/api/v1/endpoints/test_privacy_request_endpoints.py +++ b/tests/api/v1/endpoints/test_privacy_request_endpoints.py @@ -1,7 +1,9 @@ import json +import os from datetime import datetime -from typing import List +from typing import List, Dict from unittest import mock + from fastapi_pagination import Params import pytest from starlette.testclient import TestClient @@ -283,7 +285,6 @@ def test_create_privacy_request_no_identities( response_data = resp.json()["failed"] assert len(response_data) == 1 - class TestGetPrivacyRequests: @pytest.fixture(scope="function") def url(self, oauth_client: ClientDetail) -> str: diff --git a/tests/conftest.py b/tests/conftest.py index ebdb5c6e3..2e0fdb1ed 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,7 +8,10 @@ drop_database, ) -from fidesops.core.config import config +from fidesops.core.config import ( + config, + load_toml, +) from fidesops.db.database import init_db from fidesops.db.session import get_db_session, get_db_engine from fidesops.main import app @@ -29,7 +32,7 @@ def migrate_test_db() -> None: """Apply migrations at beginning and end of testing session""" logger.debug("Applying migrations...") - assert os.getenv("TESTING", False) + assert config.is_test_mode init_db(config.database.SQLALCHEMY_TEST_DATABASE_URI) logger.debug("Migrations successfully applied") @@ -38,8 +41,7 @@ def migrate_test_db() -> None: def db() -> Generator: """Return a connection to the test DB""" # Create the test DB enginge - ## This asserts that TESTING==True - assert os.getenv("TESTING", False) + assert config.is_test_mode engine = get_db_engine( database_uri=config.database.SQLALCHEMY_TEST_DATABASE_URI, ) @@ -107,3 +109,8 @@ def _build_jwt(scopes: List[str]): return {"Authorization": "Bearer " + jwe} return _build_jwt + + +@pytest.fixture(scope="session") +def integration_config(): + yield load_toml("fidesops-integration.toml") diff --git a/tests/fixtures.py b/tests/fixtures.py index f12933650..18fb09b4e 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -1,6 +1,6 @@ import logging -from datetime import datetime, timedelta -from datetime import timezone +from datetime import datetime, timedelta, timezone +import os from typing import Dict, Generator, List from unittest import mock from uuid import uuid4 @@ -36,6 +36,9 @@ ExecutionLogStatus, ) from fidesops.models.storage import StorageConfig, ResponseFormat +from fidesops.schemas.connection_configuration import ( + SnowflakeSchema, +) from fidesops.schemas.storage.storage import ( FileNaming, StorageDetails, @@ -242,7 +245,13 @@ def redshift_connection_config(db: Session) -> Generator: @pytest.fixture(scope="function") -def snowflake_connection_config(db: Session) -> Generator: +def snowflake_connection_config_without_secrets( + db: Session, +) -> Generator: + """ + Returns a Snowflake ConnectionConfig without secrets + attached that is safe to usein any tests. + """ connection_config = ConnectionConfig.create( db=db, data={ @@ -256,6 +265,27 @@ def snowflake_connection_config(db: Session) -> Generator: connection_config.delete(db) +@pytest.fixture(scope="function") +def snowflake_connection_config( + db: Session, + integration_config: Dict[str, str], + snowflake_connection_config_without_secrets: ConnectionConfig, +) -> Generator: + """ + Returns a Snowflake ConectionConfig with secrets attached if secrets are present + in the configuration. + """ + snowflake_connection_config = snowflake_connection_config_without_secrets + uri = integration_config.get("snowflake", {}).get("external_uri") or os.environ.get( + "SNOWFLAKE_TEST_URI" + ) + if uri is not None: + schema = SnowflakeSchema(url=uri) + snowflake_connection_config.secrets = schema.dict() + snowflake_connection_config.save(db=db) + yield snowflake_connection_config + + @pytest.fixture(scope="function") def https_connection_config(db: Session) -> Generator: name = str(uuid4()) @@ -847,20 +877,45 @@ def dataset_config_preview( dataset_config.delete(db) +def load_dataset(filename: str) -> Dict: + yaml_file = load_file(filename) + with open(yaml_file, "r") as file: + return yaml.safe_load(file).get("dataset", []) + + @pytest.fixture def example_datasets() -> List[Dict]: example_datasets = [] example_filenames = [ "data/dataset/postgres_example_test_dataset.yml", "data/dataset/mongo_example_test_dataset.yml", + "data/dataset/snowflake_example_test_dataset.yml", ] for filename in example_filenames: - yaml_file = load_file(filename) - with open(yaml_file, "r") as file: - example_datasets += yaml.safe_load(file).get("dataset", []) + example_datasets += load_dataset(filename) return example_datasets +@pytest.fixture +def snowflake_example_test_dataset_config( + snowflake_connection_config: ConnectionConfig, + db: Session, + example_datasets: List[Dict], +) -> Generator: + dataset = example_datasets[2] + fides_key = dataset["fides_key"] + dataset_config = DatasetConfig.create( + db=db, + data={ + "connection_config_id": snowflake_connection_config.id, + "fides_key": fides_key, + "dataset": dataset, + }, + ) + yield dataset_config + dataset_config.delete(db=db) + + @pytest.fixture def postgres_example_test_dataset_config( connection_config: ConnectionConfig, diff --git a/tests/service/connectors/test_queryconfig.py b/tests/service/connectors/test_queryconfig.py index 1793e07f6..a2c69e1dc 100644 --- a/tests/service/connectors/test_queryconfig.py +++ b/tests/service/connectors/test_queryconfig.py @@ -173,9 +173,7 @@ def test_generate_update_stmt_one_field( } text_clause = config.generate_update_stmt(row, erasure_policy) - assert ( - text_clause.text == """UPDATE customer SET name = :name WHERE id = :id""" - ) + assert text_clause.text == """UPDATE customer SET name = :name WHERE id = :id""" assert text_clause._bindparams["name"].key == "name" assert text_clause._bindparams["name"].value is None # Null masking strategy @@ -205,9 +203,7 @@ def test_generate_update_stmt_length_truncation( text_clause = config.generate_update_stmt( row, erasure_policy_string_rewrite_long ) - assert ( - text_clause.text == """UPDATE customer SET name = :name WHERE id = :id""" - ) + assert text_clause.text == """UPDATE customer SET name = :name WHERE id = :id""" assert text_clause._bindparams["name"].key == "name" # length truncation on name field assert ( @@ -273,7 +269,7 @@ def test_generate_update_stmts_from_multiple_rules( assert ( text_clause.text - == "UPDATE customer SET name = :name,email = :email WHERE id = :id" + == "UPDATE customer SET name = :name,email = :email WHERE id = :id" ) # Two different masking strategies used for name and email assert text_clause._bindparams["name"].value is None # Null masking strategy diff --git a/tests/service/privacy_request/request_runner_service_test.py b/tests/service/privacy_request/request_runner_service_test.py index fea9527bc..fce534968 100644 --- a/tests/service/privacy_request/request_runner_service_test.py +++ b/tests/service/privacy_request/request_runner_service_test.py @@ -2,6 +2,7 @@ from typing import Any, Dict from unittest import mock from unittest.mock import Mock +from uuid import uuid4 import pydash import pytest @@ -16,6 +17,7 @@ from fidesops.models.policy import DataCategory from fidesops.models.privacy_request import PrivacyRequest, ExecutionLog from fidesops.service.connectors import PostgreSQLConnector +from fidesops.service.connectors.sql_connector import SnowflakeConnector from fidesops.service.privacy_request.request_runner_service import PrivacyRequestRunner from fidesops.util.async_util import wait_for @@ -129,7 +131,7 @@ def test_create_and_process_access_request( policy.delete(db=db) pr.delete(db=db) - db.expunge_all() + assert not pr in db # Check that `pr` has been expunged from the session assert ExecutionLog.get(db, id=log_id).privacy_request_id == pr_id @@ -317,3 +319,93 @@ def test_create_and_process_erasure_request_read_access( # "read" access assert row[1] is not None assert customer_found + + +@pytest.fixture(scope="function") +def snowflake_resources( + snowflake_example_test_dataset_config, +): + snowflake_connection_config = ( + snowflake_example_test_dataset_config.connection_config + ) + snowflake_client = SnowflakeConnector(snowflake_connection_config).client() + uuid = str(uuid4()) + customer_email = f"customer-{uuid}@example.com" + formatted_customer_email = f"'{customer_email}'" + customer_name = f"{uuid}" + formatted_customer_name = f"'{customer_name}'" + + stmt = 'select max("id") from "customer";' + res = snowflake_client.execute(stmt).all() + customer_id = res[0][0] + 1 + + stmt = f""" + insert into "customer" ("id", "email", "name", "variant_eg") + select {customer_id}, {formatted_customer_email}, {formatted_customer_name}, to_variant({formatted_customer_name}); + """ + res = snowflake_client.execute(stmt).all() + assert res[0][0] == 1 + yield { + "email": customer_email, + "formatted_email": formatted_customer_email, + "name": customer_name, + "id": customer_id, + "client": snowflake_client, + } + # Remove test data and close Snowflake connection in teardown + stmt = f'delete from "customer" where "email" = {formatted_customer_email};' + res = snowflake_client.execute(stmt).all() + assert res[0][0] == 1 + + +@pytest.mark.integration_external +def test_create_and_process_access_request_snowflake( + snowflake_resources, + db, + cache, + policy, +): + customer_email = snowflake_resources["email"] + customer_name = snowflake_resources["name"] + data = { + "requested_at": "2021-08-30T16:09:37.359Z", + "policy_key": policy.key, + "identities": [{"email": customer_email}], + } + pr = get_privacy_request_results(db, policy, cache, data) + results = pr.get_results() + customer_table_key = ( + f"EN_{pr.id}__access_request__snowflake_example_test_dataset:customer" + ) + assert len(results[customer_table_key]) == 1 + assert results[customer_table_key][0]["email"] == customer_email + assert results[customer_table_key][0]["name"] == customer_name + + pr.delete(db=db) + + +@pytest.mark.integration_external +def test_create_and_process_erasure_request_snowflake( + snowflake_example_test_dataset_config, + snowflake_resources, + integration_config: Dict[str, str], + db, + cache, + erasure_policy, +): + customer_email = snowflake_resources["email"] + snowflake_client = snowflake_resources["client"] + formatted_customer_email = snowflake_resources["formatted_email"] + data = { + "requested_at": "2021-08-30T16:09:37.359Z", + "policy_key": erasure_policy.key, + "identities": [{"email": customer_email}], + } + pr = get_privacy_request_results(db, erasure_policy, cache, data) + pr.delete(db=db) + + stmt = f'select "name", "variant_eg" from "customer" where "email" = {formatted_customer_email};' + res = snowflake_client.execute(stmt).all() + for row in res: + assert row[0] == None + assert row[1] == None diff --git a/tests/util/test_logger.py b/tests/util/test_logger.py index 1b59c1d74..b520d37d7 100644 --- a/tests/util/test_logger.py +++ b/tests/util/test_logger.py @@ -1,8 +1,19 @@ +import os +import pytest + from fidesops.util import logger from fidesops.util.logger import NotPii, MASKED -def test_logger_masks_pii() -> None: +@pytest.fixture(scope="function") +def toggle_testing_envvar() -> None: + original_value = os.getenv("TESTING") + del os.environ["TESTING"] + yield + os.environ["TESTING"] = original_value + + +def test_logger_masks_pii(toggle_testing_envvar) -> None: some_data = "some_data" result = logger._mask_pii_for_logs(some_data) assert result == MASKED