From 0a6b0339ea10a2fd9dd9d731c58fdfc818c7f273 Mon Sep 17 00:00:00 2001 From: Miles Mason Winther <42948872+mmwinther@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:03:03 +0100 Subject: [PATCH] Unify the logging config so that JSON logging works --- gunicorn.conf.py | 35 +------ pyproject.toml | 3 + src/datadoc/app.py | 4 +- src/datadoc/config.py | 16 +--- .../gunicorn_access_log_filter.py | 11 +++ .../logging_configuration/logging_config.py | 96 +++++++++++-------- tests/conftest.py | 53 +++++----- .../callbacks/test_dataset_callbacks.py | 6 +- 8 files changed, 103 insertions(+), 121 deletions(-) create mode 100644 src/datadoc/logging_configuration/gunicorn_access_log_filter.py diff --git a/gunicorn.conf.py b/gunicorn.conf.py index dbf68b39..a9ed4fe2 100644 --- a/gunicorn.conf.py +++ b/gunicorn.conf.py @@ -1,40 +1,9 @@ """Configuration for the Gunicorn server.""" -import sys +from datadoc.logging_configuration.logging_config import get_log_config bind = "0.0.0.0:8050" workers = 1 loglevel = "info" preload = True - -logconfig_dict = GUNICORN_LOG_CONFIG = { - "handlers": { - "console_stdout": { - "level": "DEBUG", - "class": "logging.StreamHandler", - "stream": sys.stdout, - }, - }, - "loggers": { - "": {"handlers": ["console_stdout"], "level": "INFO", "propagate": False}, - "gunicorn": { - "handlers": ["console_stdout"], - "level": "INFO", - "propagate": False, - }, - "gunicorn.access": { - "handlers": ["console_stdout"], - "level": "INFO", - "propagate": False, - }, - "gunicorn.error": { - "handlers": ["console_stdout"], - "level": "INFO", - "propagate": False, - }, - }, - "root": { - "level": "INFO", - "handlers": ["console_stdout"], - }, -} +logconfig_dict = get_log_config() diff --git a/pyproject.toml b/pyproject.toml index 58c58f8b..ffc27fe2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -161,6 +161,9 @@ max-complexity = 15 [tool.ruff.lint.pydocstyle] convention = "google" # You can also use "numpy". +[tool.ruff.lint.flake8-pytest-style] +fixture-parentheses = false + [tool.ruff.lint.pep8-naming] classmethod-decorators = [ "classmethod", diff --git a/src/datadoc/app.py b/src/datadoc/app.py index 981e9f0f..2b0030be 100644 --- a/src/datadoc/app.py +++ b/src/datadoc/app.py @@ -24,12 +24,12 @@ from datadoc.frontend.components.control_bars import build_controls_bar from datadoc.frontend.components.control_bars import build_footer_control_bar from datadoc.frontend.components.control_bars import header -from datadoc.logging_configuration.logging_config import configure_logging +from datadoc.logging_configuration.logging_config import get_log_config from datadoc.utils import get_app_version from datadoc.utils import pick_random_port from datadoc.utils import running_in_notebook -configure_logging() +logging.config.dictConfig(get_log_config()) logger = logging.getLogger(__name__) diff --git a/src/datadoc/config.py b/src/datadoc/config.py index 1039d250..015e6af3 100644 --- a/src/datadoc/config.py +++ b/src/datadoc/config.py @@ -2,24 +2,16 @@ from __future__ import annotations -import logging import os -import sys from pathlib import Path -from pprint import pformat from typing import Literal from dapla_metadata.datasets import enums -from dotenv import dotenv_values from dotenv import load_dotenv from datadoc.constants import DAPLA_MANUAL_TEXT from datadoc.frontend.components.builders import build_link_object -logging.basicConfig(level=logging.DEBUG, force=True, stream=sys.stdout) - -logger = logging.getLogger(__name__) - DOT_ENV_FILE_PATH = Path(__file__).parent.joinpath(".env") JUPYTERHUB_USER = "JUPYTERHUB_USER" @@ -34,18 +26,12 @@ def _load_dotenv_file() -> None: if not env_loaded and DOT_ENV_FILE_PATH.exists(): load_dotenv(DOT_ENV_FILE_PATH) env_loaded = True - logger.info( - "Loaded .env file with config keys: \n%s", - pformat(list(dotenv_values(DOT_ENV_FILE_PATH).keys())), - ) def _get_config_item(item: str) -> str | None: """Get a config item. Makes sure all access is logged.""" _load_dotenv_file() - value = os.getenv(item) - logger.debug("Config accessed. %s", item) - return value + return os.getenv(item) def get_jupyterhub_user() -> str | None: diff --git a/src/datadoc/logging_configuration/gunicorn_access_log_filter.py b/src/datadoc/logging_configuration/gunicorn_access_log_filter.py new file mode 100644 index 00000000..36552fd4 --- /dev/null +++ b/src/datadoc/logging_configuration/gunicorn_access_log_filter.py @@ -0,0 +1,11 @@ +import logging + +EXCLUDED_PATHS = ["/healthz", "/_dash-", "/assets"] + + +class GunicornAccessLoggerHealthProbeFilter(logging.Filter): + """Filter out any Gunicorn access logs on Liveness or Readiness probes.""" + + def filter(self, record: logging.LogRecord) -> bool: + """Filter health probes on the /healthz endpoints.""" + return all(path not in record.getMessage() for path in EXCLUDED_PATHS) diff --git a/src/datadoc/logging_configuration/logging_config.py b/src/datadoc/logging_configuration/logging_config.py index 19919cb9..2edcfeb1 100644 --- a/src/datadoc/logging_configuration/logging_config.py +++ b/src/datadoc/logging_configuration/logging_config.py @@ -1,55 +1,69 @@ from __future__ import annotations -import logging.config from typing import Any from datadoc.config import get_log_formatter from datadoc.config import get_log_level +from datadoc.logging_configuration.gunicorn_access_log_filter import ( + GunicornAccessLoggerHealthProbeFilter, +) -def configure_logging(config: dict[str, Any] | None = None) -> None: +def get_log_config() -> dict[str, Any]: """Configure logging for the application.""" - if not config: - config = { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "simple": { - "format": "%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s", + return { + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "simple": { + "format": "%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s", + "logger": "name", + "datefmt": "%Y-%m-%d %H:%M:%S", + }, + "json": { + "()": "datadoc.logging_configuration.json_formatter.DatadocJSONFormatter", + "fmt_keys": { + "level": "levelname", + "message": "message", + "timestamp": "timestamp", "logger": "name", - "datefmt": "%Y-%m-%d %H:%M:%S", - }, - "json": { - "()": "datadoc.logging_configuration.json_formatter.DatadocJSONFormatter", - "fmt_keys": { - "level": "levelname", - "message": "message", - "timestamp": "timestamp", - "logger": "name", - "module": "module", - "function": "funcName", - "line": "lineno", - "thread_name": "threadName", - }, + "module": "module", + "function": "funcName", + "line": "lineno", + "thread_name": "threadName", }, }, - "handlers": { - "stdout": { - "class": "logging.StreamHandler", - "level": get_log_level(), - "formatter": get_log_formatter(), - "stream": "ext://sys.stdout", - }, + }, + "handlers": { + "stdout": { + "class": "logging.StreamHandler", + "level": get_log_level(), + "formatter": get_log_formatter(), + "stream": "ext://sys.stdout", }, - "loggers": { - "root": { - "level": get_log_level(), - "handlers": [ - "stdout", - ], - }, + }, + "loggers": { + "gunicorn": { + "handlers": ["stdout"], + "level": "INFO", + "propagate": False, }, - } - - logging.config.dictConfig(config) - logging.getLogger("faker").setLevel(logging.ERROR) + "gunicorn.access": { + "handlers": ["stdout"], + "level": "INFO", + "propagate": False, + "filters": [GunicornAccessLoggerHealthProbeFilter()], + }, + "gunicorn.error": { + "handlers": ["stdout"], + "level": "INFO", + "propagate": False, + }, + }, + "root": { + "level": get_log_level(), + "handlers": [ + "stdout", + ], + }, + } diff --git a/tests/conftest.py b/tests/conftest.py index d32b3be7..e26dd83b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,6 +5,7 @@ import concurrent import copy import functools +import logging import os import pathlib import shutil @@ -36,14 +37,12 @@ from pytest_mock import MockerFixture +logging.getLogger("faker").setLevel(logging.ERROR) DATADOC_METADATA_MODULE = "dapla_metadata.datasets" CODE_LIST_DIR = "code_list" STATISTICAL_SUBJECT_STRUCTURE_DIR = "statistical_subject_structure" -if TYPE_CHECKING: - from pytest_mock import MockerFixture - @pytest.fixture(autouse=True) def _clear_environment(mocker: MockerFixture) -> None: @@ -56,12 +55,12 @@ def faker_session_locale(): return ["no_NO"] -@pytest.fixture() +@pytest.fixture def dummy_timestamp() -> datetime: return datetime(2022, 1, 1, tzinfo=timezone.utc) -@pytest.fixture() +@pytest.fixture def _mock_timestamp(mocker: MockerFixture, dummy_timestamp: datetime) -> None: mocker.patch( DATADOC_METADATA_MODULE + ".core.get_timestamp_now", @@ -69,7 +68,7 @@ def _mock_timestamp(mocker: MockerFixture, dummy_timestamp: datetime) -> None: ) -@pytest.fixture() +@pytest.fixture def _mock_user_info(mocker: MockerFixture) -> None: mocker.patch( DATADOC_METADATA_MODULE + ".user_info.get_user_info_for_current_platform", @@ -77,7 +76,7 @@ def _mock_user_info(mocker: MockerFixture) -> None: ) -@pytest.fixture() +@pytest.fixture def metadata( _mock_timestamp: None, _mock_user_info: None, @@ -91,7 +90,7 @@ def metadata( ) -@pytest.fixture() +@pytest.fixture def metadata_illegal_shortnames( _mock_timestamp: None, _mock_user_info: None, @@ -108,7 +107,7 @@ def metadata_illegal_shortnames( ) -@pytest.fixture() +@pytest.fixture def existing_metadata_path() -> Path: return TEST_EXISTING_METADATA_DIRECTORY @@ -123,22 +122,22 @@ def _clear_state() -> None: pass -@pytest.fixture() +@pytest.fixture def english_name() -> str: return "English Name" -@pytest.fixture() +@pytest.fixture def bokmål_name() -> str: return "Bokmål navn" -@pytest.fixture() +@pytest.fixture def nynorsk_name() -> str: return "Nynorsk namn" -@pytest.fixture() +@pytest.fixture def language_object( english_name: str, bokmål_name: str, @@ -153,7 +152,7 @@ def language_object( ) -@pytest.fixture() +@pytest.fixture def full_dataset_state_path( path_parts_to_insert: str | list[str], ) -> pathlib.Path: @@ -177,7 +176,7 @@ def full_dataset_state_path( return pathlib.Path().joinpath(*new_path) -@pytest.fixture() +@pytest.fixture def subject_xml_file_path() -> pathlib.Path: return ( TEST_RESOURCES_DIRECTORY @@ -186,12 +185,12 @@ def subject_xml_file_path() -> pathlib.Path: ) -@pytest.fixture() +@pytest.fixture def thread_pool_executor() -> concurrent.futures.ThreadPoolExecutor: return concurrent.futures.ThreadPoolExecutor(max_workers=12) -@pytest.fixture() +@pytest.fixture def subject_mapping_fake_statistical_structure( _mock_fetch_statistical_structure, thread_pool_executor, @@ -199,7 +198,7 @@ def subject_mapping_fake_statistical_structure( return StatisticSubjectMapping(thread_pool_executor, "placeholder") -@pytest.fixture() +@pytest.fixture def _mock_fetch_statistical_structure( mocker, subject_xml_file_path: pathlib.Path, @@ -215,7 +214,7 @@ def fake_statistical_structure() -> ResultSet: ) -@pytest.fixture() +@pytest.fixture def _statistic_subject_mapping_fake_subjects( subject_mapping_fake_statistical_structure, ) -> None: @@ -223,7 +222,7 @@ def _statistic_subject_mapping_fake_subjects( state.statistic_subject_mapping.wait_for_external_result() -@pytest.fixture() +@pytest.fixture def subject_mapping_http_exception( requests_mock, exception_to_raise, @@ -236,22 +235,22 @@ def subject_mapping_http_exception( return StatisticSubjectMapping(thread_pool_executor, "http://test.some.url.com") -@pytest.fixture() +@pytest.fixture def code_list_csv_filepath_nb() -> pathlib.Path: return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nb.csv" -@pytest.fixture() +@pytest.fixture def code_list_csv_filepath_nn() -> pathlib.Path: return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nn.csv" -@pytest.fixture() +@pytest.fixture def code_list_csv_filepath_en() -> pathlib.Path: return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_en.csv" -@pytest.fixture() +@pytest.fixture def _mock_fetch_dataframe( mocker, code_list_csv_filepath_nb: pathlib.Path, @@ -272,12 +271,12 @@ def fake_code_list() -> dict[str, pd.DataFrame]: ) -@pytest.fixture() +@pytest.fixture def code_list_fake_structure(_mock_fetch_dataframe, thread_pool_executor) -> CodeList: return CodeList(thread_pool_executor, 100) -@pytest.fixture() +@pytest.fixture def _code_list_fake_classifications(code_list_fake_structure) -> None: state.measurement_units = code_list_fake_structure state.measurement_units.wait_for_external_result() @@ -292,7 +291,7 @@ def _code_list_fake_classifications(code_list_fake_structure) -> None: state.organisational_units.wait_for_external_result() -@pytest.fixture() +@pytest.fixture def copy_dataset_to_path( tmp_path: pathlib.Path, full_dataset_state_path: pathlib.Path, diff --git a/tests/frontend/callbacks/test_dataset_callbacks.py b/tests/frontend/callbacks/test_dataset_callbacks.py index 80a2a4e6..7f7f7bfc 100644 --- a/tests/frontend/callbacks/test_dataset_callbacks.py +++ b/tests/frontend/callbacks/test_dataset_callbacks.py @@ -37,17 +37,17 @@ DATASET_CALLBACKS_MODULE = "datadoc.frontend.callbacks.dataset" -@pytest.fixture() +@pytest.fixture def n_clicks_1(): return 1 -@pytest.fixture() +@pytest.fixture def file_path(): return "valid/path/to/file.json" -@pytest.fixture() +@pytest.fixture def file_path_without_dates(): return "valid/path/to/person_data_v1.parquet"