diff --git a/packages/service-library/src/servicelib/fastapi/httpx_utils.py b/packages/service-library/src/servicelib/fastapi/httpx_utils.py index b8fcc8fd291..6018a9d01cd 100644 --- a/packages/service-library/src/servicelib/fastapi/httpx_utils.py +++ b/packages/service-library/src/servicelib/fastapi/httpx_utils.py @@ -1,12 +1,10 @@ import httpx - -def _is_secret(k: str) -> bool: - return "secret" in k.lower() or "pass" in k.lower() +from ..utils_secrets import mask_sensitive_data def _get_headers_safely(request: httpx.Request) -> dict[str, str]: - return {k: "*" * 5 if _is_secret(k) else v for k, v in request.headers.items()} + return mask_sensitive_data(dict(request.headers)) def to_httpx_command( diff --git a/packages/service-library/src/servicelib/logging_utils.py b/packages/service-library/src/servicelib/logging_utils.py index cd3dc0f289e..f7f8944e58a 100644 --- a/packages/service-library/src/servicelib/logging_utils.py +++ b/packages/service-library/src/servicelib/logging_utils.py @@ -7,16 +7,18 @@ import asyncio import functools import logging -import os import sys from asyncio import iscoroutinefunction from collections.abc import Callable from contextlib import contextmanager from datetime import datetime from inspect import getframeinfo, stack +from pathlib import Path from typing import Any, TypeAlias, TypedDict -log = logging.getLogger(__name__) +from .utils_secrets import mask_sensitive_data + +_logger = logging.getLogger(__name__) BLACK = "\033[0;30m" @@ -135,8 +137,13 @@ def test_logger_propagation(logger: logging.Logger): def _log_arguments( logger_obj: logging.Logger, level: int, func: Callable, *args, **kwargs ) -> dict[str, str]: + # NOTE: We should avoid logging arguments but in the meantime, we are trying to + # avoid exposing sensitive data in the logs. For `args` is more difficult. We could eventually + # deduced sensitivity based on the entropy of values but it is very costly + # SEE https://github.com/ITISFoundation/osparc-simcore/security/code-scanning/18 args_passed_in_function = [repr(a) for a in args] - kwargs_passed_in_function = [f"{k}={v!r}" for k, v in kwargs.items()] + masked_kwargs = mask_sensitive_data(kwargs) + kwargs_passed_in_function = [f"{k}={v!r}" for k, v in masked_kwargs.items()] # The lists of positional and keyword arguments is joined together to form final string formatted_arguments = ", ".join(args_passed_in_function + kwargs_passed_in_function) @@ -148,7 +155,7 @@ def _log_arguments( py_file_caller = getframeinfo(stack()[1][0]) extra_args = { "func_name_override": func.__name__, - "file_name_override": os.path.basename(py_file_caller.filename), + "file_name_override": Path(py_file_caller.filename).name, } # Before to the function execution, log function details. @@ -164,7 +171,7 @@ def _log_arguments( def log_decorator(logger=None, level: int = logging.DEBUG, log_traceback: bool = False): # Build logger object - logger_obj = logger or log + logger_obj = logger or _logger def log_decorator_info(func): if iscoroutinefunction(func): diff --git a/packages/service-library/src/servicelib/utils_secrets.py b/packages/service-library/src/servicelib/utils_secrets.py index f1cf5a9f7c2..7b74a491080 100644 --- a/packages/service-library/src/servicelib/utils_secrets.py +++ b/packages/service-library/src/servicelib/utils_secrets.py @@ -57,3 +57,39 @@ def secure_randint(start: StrictInt, end: StrictInt) -> int: diff = end - start return secrets.randbelow(diff) + start + + +_PLACEHOLDER: Final[str] = "*" * 8 +_DEFAULT_SENSITIVE_KEYWORDS: Final[set[str]] = {"pass", "secret"} + + +def _is_possibly_sensitive(name: str, sensitive_keywords: set[str]) -> bool: + return any(k.lower() in name.lower() for k in sensitive_keywords) + + +def mask_sensitive_data( + data: dict, *, extra_sensitive_keywords: set[str] | None = None +) -> dict: + """Replaces the sensitive values in the dict with a placeholder before logging + + Sensitive values are detected testing the key name (i.e. a str(key) ) againts sensitive keywords `pass` or `secret`. + + NOTE: this function is used to avoid logging sensitive information like passwords or secrets + """ + sensitive_keywords = _DEFAULT_SENSITIVE_KEYWORDS | ( + extra_sensitive_keywords or set() + ) + masked_data = {} + for key, value in data.items(): + if isinstance(value, dict): + masked_data[key] = mask_sensitive_data( + value, extra_sensitive_keywords=sensitive_keywords + ) + else: + masked_data[key] = ( + _PLACEHOLDER + if _is_possibly_sensitive(f"{key}", sensitive_keywords) + else value + ) + + return masked_data diff --git a/packages/service-library/tests/fastapi/test_httpx_utils.py b/packages/service-library/tests/fastapi/test_httpx_utils.py index a8d3de6f2e2..3d37a77988e 100644 --- a/packages/service-library/tests/fastapi/test_httpx_utils.py +++ b/packages/service-library/tests/fastapi/test_httpx_utils.py @@ -14,6 +14,7 @@ from fastapi import status from httpx import AsyncClient from servicelib.fastapi.httpx_utils import to_curl_command, to_httpx_command +from servicelib.utils_secrets import _PLACEHOLDER @pytest.fixture @@ -61,7 +62,7 @@ async def test_to_curl_command(client: AsyncClient): assert ( cmd_short - == 'curl -X POST -H "host: test_base_http_api" -H "accept: */*" -H "accept-encoding: gzip, deflate" -H "connection: keep-alive" -H "user-agent: python-httpx/0.25.0" -H "x-secret: *****" -H "content-length: 9" -H "content-type: application/json" -d \'{"y": 12}\' https://test_base_http_api/foo?x=3' + == f'curl -X POST -H "host: test_base_http_api" -H "accept: */*" -H "accept-encoding: gzip, deflate" -H "connection: keep-alive" -H "user-agent: python-httpx/0.25.0" -H "x-secret: {_PLACEHOLDER}" -H "content-length: 9" -H "content-type: application/json" -d \'{{"y": 12}}\' https://test_base_http_api/foo?x=3' ) cmd_long = to_curl_command(response.request, use_short_options=False) @@ -114,5 +115,5 @@ async def test_to_httpx_command(client: AsyncClient): print(cmd_short) assert ( cmd_short - == 'httpx -m POST -c \'{"y": 12}\' -h "host" "test_base_http_api" -h "accept" "*/*" -h "accept-encoding" "gzip, deflate" -h "connection" "keep-alive" -h "user-agent" "python-httpx/0.25.0" -h "x-secret" "*****" -h "content-length" "9" -h "content-type" "application/json" https://test_base_http_api/foo?x=3' + == f'httpx -m POST -c \'{{"y": 12}}\' -h "host" "test_base_http_api" -h "accept" "*/*" -h "accept-encoding" "gzip, deflate" -h "connection" "keep-alive" -h "user-agent" "python-httpx/0.25.0" -h "x-secret" "{_PLACEHOLDER}" -h "content-length" "9" -h "content-type" "application/json" https://test_base_http_api/foo?x=3' ) diff --git a/packages/service-library/tests/test_utils_secrets.py b/packages/service-library/tests/test_utils_secrets.py index ac36f98d405..37e31f4c076 100644 --- a/packages/service-library/tests/test_utils_secrets.py +++ b/packages/service-library/tests/test_utils_secrets.py @@ -5,16 +5,20 @@ # pylint: disable=unused-variable +from uuid import uuid4 + import pytest from pydantic import ValidationError from servicelib.utils_secrets import ( _MIN_SECRET_NUM_BYTES, + _PLACEHOLDER, MIN_PASSCODE_LENGTH, MIN_PASSWORD_LENGTH, are_secrets_equal, generate_passcode, generate_password, generate_token_secret_key, + mask_sensitive_data, secure_randint, ) @@ -80,3 +84,36 @@ async def test_secure_randint(start: int, end: int): async def test_secure_randint_called_with_wrong_tupes(): with pytest.raises(ValidationError): secure_randint(1.1, 2) + + +def test_mask_sensitive_data(): + + # NOTE: any hasahble object can be a dict key + uuid_obj = uuid4() + other_obj = object() + + sensitive_data = { + "username": "john_doe", + "password": "sensitive_password", + "details": { + "secret_key": "super_secret_key", + "nested": {"nested_password": "nested_sensitive_password"}, + }, + "credit-card": "12345", + uuid_obj: other_obj, + } + + masked_data = mask_sensitive_data( + sensitive_data, extra_sensitive_keywords={"credit-card"} + ) + + assert masked_data == { + "username": "john_doe", + "password": _PLACEHOLDER, + "details": { + "secret_key": _PLACEHOLDER, + "nested": {"nested_password": _PLACEHOLDER}, + }, + "credit-card": _PLACEHOLDER, + uuid_obj: other_obj, + } diff --git a/services/director-v2/src/simcore_service_director_v2/modules/osparc_variables_substitutions.py b/services/director-v2/src/simcore_service_director_v2/modules/osparc_variables_substitutions.py index 8aa4401d712..8c70c68694e 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/osparc_variables_substitutions.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/osparc_variables_substitutions.py @@ -50,14 +50,17 @@ async def substitute_vendor_secrets_in_model( # if it raises an error vars need replacement _logger.debug("model in which to replace model=%s", model) raise_if_unresolved_osparc_variable_identifier_found(model) - except UnresolvedOsparcVariableIdentifierError: + except UnresolvedOsparcVariableIdentifierError as err: repo = get_repository(app, ServicesEnvironmentsRepository) vendor_secrets = await repo.get_vendor_secrets( service_key=service_key, service_version=service_version, product_name=product_name, ) - _logger.warning("replacing with the vendor_secrets=%s", vendor_secrets) + _logger.warning( + "Failed to resolve osparc variable identifiers in model (%s). Replacing vendor secrets", + err, + ) result = replace_osparc_variable_identifier(model, vendor_secrets) if not safe: @@ -83,7 +86,6 @@ async def resolve_and_substitute_session_variables_in_model( ): # checks before to avoid unnecessary calls to pg # if it raises an error vars need replacement - _logger.debug("model in which to replace model=%s", model) raise_if_unresolved_osparc_variable_identifier_found(model) except UnresolvedOsparcVariableIdentifierError: table: OsparcVariablesTable = app.state.session_variables_table @@ -130,10 +132,6 @@ async def substitute_vendor_secrets_in_specs( service_version=service_version, product_name=product_name, ) - _logger.debug( - "substitute_vendor_secrets_in_specs stored_vendor_secrets=%s", - vendor_secrets, - ) # resolve substitutions resolver.set_substitutions(mappings=vendor_secrets)