Skip to content
This repository has been archived by the owner on Nov 30, 2022. It is now read-only.

Commit

Permalink
closes #36- masks params provided in % str formatter in logs, unless …
Browse files Browse the repository at this point in the history
…set otherwise
  • Loading branch information
catherinesmith committed Nov 15, 2021
1 parent dc72d96 commit afebf29
Show file tree
Hide file tree
Showing 17 changed files with 103 additions and 53 deletions.
1 change: 0 additions & 1 deletion docs/fidesops/docs/tutorial/outline.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import yaml
from datetime import datetime

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

# For tutorial simplicity. In prod, this should go in an ENV file or similar.
FIDESOPS_URL = "http://localhost:8000"
Expand Down
7 changes: 4 additions & 3 deletions src/fidesops/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from pydantic.env_settings import SettingsSourceCallable

from fidesops.common_exceptions import MissingConfig
from fidesops.util.logger import NotPii

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -201,9 +202,9 @@ def load_file(file_name: str) -> str:
for dir_str in directories:
possible_location = os.path.join(dir_str, file_name)
if possible_location and os.path.isfile(possible_location):
logger.info("Loading file %s from %s", file_name, dir_str)
logger.info("Loading file %s from %s", NotPii(file_name), NotPii(dir_str))
return possible_location
logger.debug("%s not found at %s", file_name, dir_str)
logger.debug("%s not found at %s", NotPii(file_name), NotPii(dir_str))
raise FileNotFoundError


Expand All @@ -229,7 +230,7 @@ def get_config() -> FidesopsConfig:
try:
return FidesopsConfig.parse_obj(load_toml("fidesops.toml"))
except (FileNotFoundError, ValidationError) as e:
logger.warning("fidesops.toml could not be loaded: %s", e)
logger.warning("fidesops.toml could not be loaded: %s", NotPii(e))
# If no path is specified Pydantic will attempt to read settings from
# the environment. Default values will still be used if the matching
# environment variable is not set.
Expand Down
1 change: 0 additions & 1 deletion src/fidesops/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
Field,
)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down
4 changes: 2 additions & 2 deletions src/fidesops/graph/traversal.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@
ROOT_COLLECTION_ADDRESS,
)
from fidesops.graph.graph import Node, Edge, DatasetGraph
from fidesops.util.logger import NotPii
from fidesops.util.queue import Queue
from fidesops.util.collection_util import append

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -174,7 +174,7 @@ def __verify_traversal(self) -> None:
and raises an error on any traversal failure conditions."""
self.traverse(
{self.root_node.address: [self.seed_data]},
lambda n, m: logger.info("Traverse %s", n.address),
lambda n, m: logger.info("Traverse %s", NotPii(n.address)),
)

def traversal_map(
Expand Down
2 changes: 2 additions & 0 deletions src/fidesops/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,10 @@
from fidesops.db.database import init_db
from fidesops.core.config import config
from fidesops.tasks.scheduled.tasks import initiate_scheduled_request_intake
from fidesops.util.logger import get_fides_log_record_factory

logging.basicConfig(level=logging.INFO)
logging.setLogRecordFactory(get_fides_log_record_factory())
logger = logging.getLogger(__name__)

app = FastAPI(title="fidesops", openapi_url=f"{V1_URL_PREFIX}/openapi.json")
Expand Down
11 changes: 6 additions & 5 deletions src/fidesops/models/datasetconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
)
from fidesops.models.connectionconfig import ConnectionConfig
from fidesops.schemas.dataset import FidesopsDataset, FidesopsDatasetField
from fidesops.util.logger import NotPii

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -144,9 +145,9 @@ def convert_dataset_to_graph(dataset: FidesopsDataset, connection_key: str) -> D
]
logger.debug(
"Parsing dataset %s: parsed collection %s with %s fields",
dataset_name,
collection.name,
len(graph_fields),
NotPii(dataset_name),
NotPii(collection.name),
NotPii(len(graph_fields)),
)
collection_after: Set[CollectionAddress] = set()
if collection.fidesops_meta and collection.fidesops_meta.after:
Expand All @@ -160,8 +161,8 @@ def convert_dataset_to_graph(dataset: FidesopsDataset, connection_key: str) -> D
graph_collections.append(graph_collection)
logger.debug(
"Finished parsing dataset %s with %s collections",
dataset_name,
len(graph_collections),
NotPii(dataset_name),
NotPii(len(graph_collections)),
)

return Dataset(
Expand Down
3 changes: 0 additions & 3 deletions src/fidesops/models/privacy_request.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# pylint: disable=R0401
import logging
from typing import Any, Dict

from enum import Enum as EnumType
Expand All @@ -26,8 +25,6 @@
FidesopsRedis,
)

logging.basicConfig(level=logging.INFO)


class PrivacyRequestStatus(EnumType):
"""Enum for privacy request statuses, reflecting where they are in the Privacy Request Lifecycle"""
Expand Down
9 changes: 1 addition & 8 deletions src/fidesops/service/connectors/base_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from fidesops.models.policy import Policy
from fidesops.service.connectors.query_config import QueryConfig

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -52,13 +51,7 @@ def retrieve_data(
each input key that may be queried on."""

@abstractmethod
def mask_data(
self,
node: TraversalNode,
policy: Policy,
rows: List[Row],
log_queries_with_data: bool = False,
) -> int:
def mask_data(self, node: TraversalNode, policy: Policy, rows: List[Row]) -> int:
"""Execute a masking request. Return the number of rows that have been updated"""

def dry_run_query(self, node: TraversalNode) -> str:
Expand Down
21 changes: 8 additions & 13 deletions src/fidesops/service/connectors/mongodb_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
BaseConnector,
)
from fidesops.service.connectors.query_config import QueryConfig, MongoQueryConfig
from fidesops.util.logger import NotPii

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -100,13 +100,7 @@ def retrieve_data(
logger.info(f"Found {len(rows)} on {node.address}")
return rows

def mask_data(
self,
node: TraversalNode,
policy: Policy,
rows: List[Row],
log_queries_with_data: bool = True,
) -> int:
def mask_data(self, node: TraversalNode, policy: Policy, rows: List[Row]) -> int:
# pylint: disable=too-many-locals
"""Execute a masking request"""
query_config = self.query_config(node)
Expand All @@ -121,10 +115,11 @@ def mask_data(
collection = db[collection_name]
update_result = collection.update_one(query, update, upsert=False)
update_ct += update_result.modified_count

if log_queries_with_data:
logger.info(
f"db.{collection_name}.update_one({query}, {update}, upsert=False)"
)
logger.info(
"db.%s.update_one(%s, %s, upsert=False)",
NotPii(collection_name),
query,
update,
)

return update_ct
1 change: 0 additions & 1 deletion src/fidesops/service/connectors/query_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from fidesops.service.masking.strategy.masking_strategy_factory import get_strategy
from fidesops.util.collection_util import append

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
T = TypeVar("T")

Expand Down
9 changes: 1 addition & 8 deletions src/fidesops/service/connectors/sql_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
)
from fidesops.service.connectors.query_config import SQLQueryConfig

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -82,13 +81,7 @@ def retrieve_data(
results = connection.execute(stmt)
return SQLConnector.cursor_result_to_rows(results)

def mask_data(
self,
node: TraversalNode,
policy: Policy,
rows: List[Row],
log_queries_with_data: bool = True,
) -> int:
def mask_data(self, node: TraversalNode, policy: Policy, rows: List[Row]) -> int:
"""Execute a masking request. Returns the number of records masked"""
query_config = self.query_config(node)
update_ct = 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
)
from fidesops.util.cache import FidesopsRedis

logging.basicConfig(level=logging.INFO)


class PrivacyRequestRunner:
"""The class responsible for dispatching PrivacyRequests into the execution layer"""
Expand Down
8 changes: 4 additions & 4 deletions src/fidesops/task/graph_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from fidesops.service.connectors import BaseConnector
from fidesops.task.task_resources import TaskResources
from fidesops.util.collection_util import partition, append
from fidesops.util.logger import NotPii

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

dask.config.set(scheduler="processes")
Expand Down Expand Up @@ -142,9 +142,9 @@ def to_dask_input_data(self, *data: List[Row]) -> Dict[str, List[Any]]:
if not len(data) == len(self.input_keys):
logger.warning(
"%s expected %s input keys, received %s",
self,
len(self.input_keys),
len(data),
NotPii(self),
NotPii(len(self.input_keys)),
NotPii(len(data)),
)

output: Dict[str, List[Any]] = {}
Expand Down
1 change: 0 additions & 1 deletion src/fidesops/task/task_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
)
from fidesops.util.cache import get_cache

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


Expand Down
51 changes: 51 additions & 0 deletions src/fidesops/util/logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import logging
import os
from typing import Any, Mapping, Union

MASKED = "MASKED"


class NotPii(str):
"""whitelist non pii data"""


def get_fides_log_record_factory() -> Any:
"""intercepts default LogRecord for custom handling of params"""

def factory( # pylint: disable=R0913
name: str,
level: int,
fn: str,
lno: int,
msg: str,
args: Union[tuple[Any, ...], Mapping[str, Any]],
exc_info: Any,
func: str = None,
sinfo: str = None,
) -> logging.LogRecord:
masked_args: tuple[Any, ...] = tuple(_mask_pii_for_logs(arg) for arg in args)
return logging.LogRecord(
name=name,
level=level,
pathname=fn,
lineno=lno,
msg=msg,
args=masked_args,
exc_info=exc_info,
func=func,
sinfo=sinfo,
)

return factory


def _mask_pii_for_logs(pii_text: Any) -> Any:
"""
:param pii_text: param that contains possible pii
:return: depending on ENV config, returns masked pii param
"""
if os.getenv("LOG_PII") == "True":
return pii_text
if isinstance(pii_text, NotPii):
return pii_text
return MASKED
1 change: 0 additions & 1 deletion tests/integration_tests/test_sql_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
from ..task.traversal_data import integration_db_graph, integration_db_dataset

dask.config.set(scheduler="processes")
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
sample_postgres_configuration_policy = erasure_policy(
"system.operations",
Expand Down
24 changes: 24 additions & 0 deletions tests/util/test_logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import os

from fidesops.util import logger
from fidesops.util.logger import NotPii, MASKED


def test_logger_masks_pii() -> None:
some_data = "some_data"
result = logger._mask_pii_for_logs(some_data)
assert result == MASKED


def test_logger_does_not_mask_pii_env() -> None:
os.environ["LOG_PII"] = "True"
some_data = "some_data"
result = logger._mask_pii_for_logs(some_data)
assert result == some_data
os.environ["LOG_PII"] = "False"


def test_logger_does_not_mask_whitelist() -> None:
some_data = NotPii("some_data")
result = logger._mask_pii_for_logs(some_data)
assert result == some_data

0 comments on commit afebf29

Please sign in to comment.