From 0a6b0339ea10a2fd9dd9d731c58fdfc818c7f273 Mon Sep 17 00:00:00 2001
From: Miles Mason Winther <42948872+mmwinther@users.noreply.github.com>
Date: Mon, 2 Dec 2024 16:03:03 +0100
Subject: [PATCH] Unify the logging config so that JSON logging works

---
 gunicorn.conf.py                              | 35 +------
 pyproject.toml                                |  3 +
 src/datadoc/app.py                            |  4 +-
 src/datadoc/config.py                         | 16 +---
 .../gunicorn_access_log_filter.py             | 11 +++
 .../logging_configuration/logging_config.py   | 96 +++++++++++--------
 tests/conftest.py                             | 53 +++++-----
 .../callbacks/test_dataset_callbacks.py       |  6 +-
 8 files changed, 103 insertions(+), 121 deletions(-)
 create mode 100644 src/datadoc/logging_configuration/gunicorn_access_log_filter.py

diff --git a/gunicorn.conf.py b/gunicorn.conf.py
index dbf68b39..a9ed4fe2 100644
--- a/gunicorn.conf.py
+++ b/gunicorn.conf.py
@@ -1,40 +1,9 @@
 """Configuration for the Gunicorn server."""
 
-import sys
+from datadoc.logging_configuration.logging_config import get_log_config
 
 bind = "0.0.0.0:8050"
 workers = 1
 loglevel = "info"
 preload = True
-
-logconfig_dict = GUNICORN_LOG_CONFIG = {
-    "handlers": {
-        "console_stdout": {
-            "level": "DEBUG",
-            "class": "logging.StreamHandler",
-            "stream": sys.stdout,
-        },
-    },
-    "loggers": {
-        "": {"handlers": ["console_stdout"], "level": "INFO", "propagate": False},
-        "gunicorn": {
-            "handlers": ["console_stdout"],
-            "level": "INFO",
-            "propagate": False,
-        },
-        "gunicorn.access": {
-            "handlers": ["console_stdout"],
-            "level": "INFO",
-            "propagate": False,
-        },
-        "gunicorn.error": {
-            "handlers": ["console_stdout"],
-            "level": "INFO",
-            "propagate": False,
-        },
-    },
-    "root": {
-        "level": "INFO",
-        "handlers": ["console_stdout"],
-    },
-}
+logconfig_dict = get_log_config()
diff --git a/pyproject.toml b/pyproject.toml
index 58c58f8b..ffc27fe2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -161,6 +161,9 @@ max-complexity = 15
 [tool.ruff.lint.pydocstyle]
 convention = "google" # You can also use "numpy".
 
+[tool.ruff.lint.flake8-pytest-style]
+fixture-parentheses = false
+
 [tool.ruff.lint.pep8-naming]
 classmethod-decorators = [
     "classmethod",
diff --git a/src/datadoc/app.py b/src/datadoc/app.py
index 981e9f0f..2b0030be 100644
--- a/src/datadoc/app.py
+++ b/src/datadoc/app.py
@@ -24,12 +24,12 @@
 from datadoc.frontend.components.control_bars import build_controls_bar
 from datadoc.frontend.components.control_bars import build_footer_control_bar
 from datadoc.frontend.components.control_bars import header
-from datadoc.logging_configuration.logging_config import configure_logging
+from datadoc.logging_configuration.logging_config import get_log_config
 from datadoc.utils import get_app_version
 from datadoc.utils import pick_random_port
 from datadoc.utils import running_in_notebook
 
-configure_logging()
+logging.config.dictConfig(get_log_config())
 logger = logging.getLogger(__name__)
 
 
diff --git a/src/datadoc/config.py b/src/datadoc/config.py
index 1039d250..015e6af3 100644
--- a/src/datadoc/config.py
+++ b/src/datadoc/config.py
@@ -2,24 +2,16 @@
 
 from __future__ import annotations
 
-import logging
 import os
-import sys
 from pathlib import Path
-from pprint import pformat
 from typing import Literal
 
 from dapla_metadata.datasets import enums
-from dotenv import dotenv_values
 from dotenv import load_dotenv
 
 from datadoc.constants import DAPLA_MANUAL_TEXT
 from datadoc.frontend.components.builders import build_link_object
 
-logging.basicConfig(level=logging.DEBUG, force=True, stream=sys.stdout)
-
-logger = logging.getLogger(__name__)
-
 DOT_ENV_FILE_PATH = Path(__file__).parent.joinpath(".env")
 
 JUPYTERHUB_USER = "JUPYTERHUB_USER"
@@ -34,18 +26,12 @@ def _load_dotenv_file() -> None:
     if not env_loaded and DOT_ENV_FILE_PATH.exists():
         load_dotenv(DOT_ENV_FILE_PATH)
         env_loaded = True
-        logger.info(
-            "Loaded .env file with config keys: \n%s",
-            pformat(list(dotenv_values(DOT_ENV_FILE_PATH).keys())),
-        )
 
 
 def _get_config_item(item: str) -> str | None:
     """Get a config item. Makes sure all access is logged."""
     _load_dotenv_file()
-    value = os.getenv(item)
-    logger.debug("Config accessed. %s", item)
-    return value
+    return os.getenv(item)
 
 
 def get_jupyterhub_user() -> str | None:
diff --git a/src/datadoc/logging_configuration/gunicorn_access_log_filter.py b/src/datadoc/logging_configuration/gunicorn_access_log_filter.py
new file mode 100644
index 00000000..36552fd4
--- /dev/null
+++ b/src/datadoc/logging_configuration/gunicorn_access_log_filter.py
@@ -0,0 +1,11 @@
+import logging
+
+EXCLUDED_PATHS = ["/healthz", "/_dash-", "/assets"]
+
+
+class GunicornAccessLoggerHealthProbeFilter(logging.Filter):
+    """Filter out any Gunicorn access logs on Liveness or Readiness probes."""
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        """Filter health probes on the /healthz endpoints."""
+        return all(path not in record.getMessage() for path in EXCLUDED_PATHS)
diff --git a/src/datadoc/logging_configuration/logging_config.py b/src/datadoc/logging_configuration/logging_config.py
index 19919cb9..2edcfeb1 100644
--- a/src/datadoc/logging_configuration/logging_config.py
+++ b/src/datadoc/logging_configuration/logging_config.py
@@ -1,55 +1,69 @@
 from __future__ import annotations
 
-import logging.config
 from typing import Any
 
 from datadoc.config import get_log_formatter
 from datadoc.config import get_log_level
+from datadoc.logging_configuration.gunicorn_access_log_filter import (
+    GunicornAccessLoggerHealthProbeFilter,
+)
 
 
-def configure_logging(config: dict[str, Any] | None = None) -> None:
+def get_log_config() -> dict[str, Any]:
     """Configure logging for the application."""
-    if not config:
-        config = {
-            "version": 1,
-            "disable_existing_loggers": False,
-            "formatters": {
-                "simple": {
-                    "format": "%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s",
+    return {
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {
+            "simple": {
+                "format": "%(asctime)s.%(msecs)03d %(levelname)s %(name)s: %(message)s",
+                "logger": "name",
+                "datefmt": "%Y-%m-%d %H:%M:%S",
+            },
+            "json": {
+                "()": "datadoc.logging_configuration.json_formatter.DatadocJSONFormatter",
+                "fmt_keys": {
+                    "level": "levelname",
+                    "message": "message",
+                    "timestamp": "timestamp",
                     "logger": "name",
-                    "datefmt": "%Y-%m-%d %H:%M:%S",
-                },
-                "json": {
-                    "()": "datadoc.logging_configuration.json_formatter.DatadocJSONFormatter",
-                    "fmt_keys": {
-                        "level": "levelname",
-                        "message": "message",
-                        "timestamp": "timestamp",
-                        "logger": "name",
-                        "module": "module",
-                        "function": "funcName",
-                        "line": "lineno",
-                        "thread_name": "threadName",
-                    },
+                    "module": "module",
+                    "function": "funcName",
+                    "line": "lineno",
+                    "thread_name": "threadName",
                 },
             },
-            "handlers": {
-                "stdout": {
-                    "class": "logging.StreamHandler",
-                    "level": get_log_level(),
-                    "formatter": get_log_formatter(),
-                    "stream": "ext://sys.stdout",
-                },
+        },
+        "handlers": {
+            "stdout": {
+                "class": "logging.StreamHandler",
+                "level": get_log_level(),
+                "formatter": get_log_formatter(),
+                "stream": "ext://sys.stdout",
             },
-            "loggers": {
-                "root": {
-                    "level": get_log_level(),
-                    "handlers": [
-                        "stdout",
-                    ],
-                },
+        },
+        "loggers": {
+            "gunicorn": {
+                "handlers": ["stdout"],
+                "level": "INFO",
+                "propagate": False,
             },
-        }
-
-    logging.config.dictConfig(config)
-    logging.getLogger("faker").setLevel(logging.ERROR)
+            "gunicorn.access": {
+                "handlers": ["stdout"],
+                "level": "INFO",
+                "propagate": False,
+                "filters": [GunicornAccessLoggerHealthProbeFilter()],
+            },
+            "gunicorn.error": {
+                "handlers": ["stdout"],
+                "level": "INFO",
+                "propagate": False,
+            },
+        },
+        "root": {
+            "level": get_log_level(),
+            "handlers": [
+                "stdout",
+            ],
+        },
+    }
diff --git a/tests/conftest.py b/tests/conftest.py
index d32b3be7..e26dd83b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,6 +5,7 @@
 import concurrent
 import copy
 import functools
+import logging
 import os
 import pathlib
 import shutil
@@ -36,14 +37,12 @@
 
     from pytest_mock import MockerFixture
 
+logging.getLogger("faker").setLevel(logging.ERROR)
 
 DATADOC_METADATA_MODULE = "dapla_metadata.datasets"
 CODE_LIST_DIR = "code_list"
 STATISTICAL_SUBJECT_STRUCTURE_DIR = "statistical_subject_structure"
 
-if TYPE_CHECKING:
-    from pytest_mock import MockerFixture
-
 
 @pytest.fixture(autouse=True)
 def _clear_environment(mocker: MockerFixture) -> None:
@@ -56,12 +55,12 @@ def faker_session_locale():
     return ["no_NO"]
 
 
-@pytest.fixture()
+@pytest.fixture
 def dummy_timestamp() -> datetime:
     return datetime(2022, 1, 1, tzinfo=timezone.utc)
 
 
-@pytest.fixture()
+@pytest.fixture
 def _mock_timestamp(mocker: MockerFixture, dummy_timestamp: datetime) -> None:
     mocker.patch(
         DATADOC_METADATA_MODULE + ".core.get_timestamp_now",
@@ -69,7 +68,7 @@ def _mock_timestamp(mocker: MockerFixture, dummy_timestamp: datetime) -> None:
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def _mock_user_info(mocker: MockerFixture) -> None:
     mocker.patch(
         DATADOC_METADATA_MODULE + ".user_info.get_user_info_for_current_platform",
@@ -77,7 +76,7 @@ def _mock_user_info(mocker: MockerFixture) -> None:
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def metadata(
     _mock_timestamp: None,
     _mock_user_info: None,
@@ -91,7 +90,7 @@ def metadata(
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def metadata_illegal_shortnames(
     _mock_timestamp: None,
     _mock_user_info: None,
@@ -108,7 +107,7 @@ def metadata_illegal_shortnames(
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def existing_metadata_path() -> Path:
     return TEST_EXISTING_METADATA_DIRECTORY
 
@@ -123,22 +122,22 @@ def _clear_state() -> None:
         pass
 
 
-@pytest.fixture()
+@pytest.fixture
 def english_name() -> str:
     return "English Name"
 
 
-@pytest.fixture()
+@pytest.fixture
 def bokmål_name() -> str:
     return "Bokmål navn"
 
 
-@pytest.fixture()
+@pytest.fixture
 def nynorsk_name() -> str:
     return "Nynorsk namn"
 
 
-@pytest.fixture()
+@pytest.fixture
 def language_object(
     english_name: str,
     bokmål_name: str,
@@ -153,7 +152,7 @@ def language_object(
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def full_dataset_state_path(
     path_parts_to_insert: str | list[str],
 ) -> pathlib.Path:
@@ -177,7 +176,7 @@ def full_dataset_state_path(
     return pathlib.Path().joinpath(*new_path)
 
 
-@pytest.fixture()
+@pytest.fixture
 def subject_xml_file_path() -> pathlib.Path:
     return (
         TEST_RESOURCES_DIRECTORY
@@ -186,12 +185,12 @@ def subject_xml_file_path() -> pathlib.Path:
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def thread_pool_executor() -> concurrent.futures.ThreadPoolExecutor:
     return concurrent.futures.ThreadPoolExecutor(max_workers=12)
 
 
-@pytest.fixture()
+@pytest.fixture
 def subject_mapping_fake_statistical_structure(
     _mock_fetch_statistical_structure,
     thread_pool_executor,
@@ -199,7 +198,7 @@ def subject_mapping_fake_statistical_structure(
     return StatisticSubjectMapping(thread_pool_executor, "placeholder")
 
 
-@pytest.fixture()
+@pytest.fixture
 def _mock_fetch_statistical_structure(
     mocker,
     subject_xml_file_path: pathlib.Path,
@@ -215,7 +214,7 @@ def fake_statistical_structure() -> ResultSet:
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def _statistic_subject_mapping_fake_subjects(
     subject_mapping_fake_statistical_structure,
 ) -> None:
@@ -223,7 +222,7 @@ def _statistic_subject_mapping_fake_subjects(
     state.statistic_subject_mapping.wait_for_external_result()
 
 
-@pytest.fixture()
+@pytest.fixture
 def subject_mapping_http_exception(
     requests_mock,
     exception_to_raise,
@@ -236,22 +235,22 @@ def subject_mapping_http_exception(
     return StatisticSubjectMapping(thread_pool_executor, "http://test.some.url.com")
 
 
-@pytest.fixture()
+@pytest.fixture
 def code_list_csv_filepath_nb() -> pathlib.Path:
     return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nb.csv"
 
 
-@pytest.fixture()
+@pytest.fixture
 def code_list_csv_filepath_nn() -> pathlib.Path:
     return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_nn.csv"
 
 
-@pytest.fixture()
+@pytest.fixture
 def code_list_csv_filepath_en() -> pathlib.Path:
     return TEST_RESOURCES_DIRECTORY / CODE_LIST_DIR / "code_list_en.csv"
 
 
-@pytest.fixture()
+@pytest.fixture
 def _mock_fetch_dataframe(
     mocker,
     code_list_csv_filepath_nb: pathlib.Path,
@@ -272,12 +271,12 @@ def fake_code_list() -> dict[str, pd.DataFrame]:
     )
 
 
-@pytest.fixture()
+@pytest.fixture
 def code_list_fake_structure(_mock_fetch_dataframe, thread_pool_executor) -> CodeList:
     return CodeList(thread_pool_executor, 100)
 
 
-@pytest.fixture()
+@pytest.fixture
 def _code_list_fake_classifications(code_list_fake_structure) -> None:
     state.measurement_units = code_list_fake_structure
     state.measurement_units.wait_for_external_result()
@@ -292,7 +291,7 @@ def _code_list_fake_classifications(code_list_fake_structure) -> None:
     state.organisational_units.wait_for_external_result()
 
 
-@pytest.fixture()
+@pytest.fixture
 def copy_dataset_to_path(
     tmp_path: pathlib.Path,
     full_dataset_state_path: pathlib.Path,
diff --git a/tests/frontend/callbacks/test_dataset_callbacks.py b/tests/frontend/callbacks/test_dataset_callbacks.py
index 80a2a4e6..7f7f7bfc 100644
--- a/tests/frontend/callbacks/test_dataset_callbacks.py
+++ b/tests/frontend/callbacks/test_dataset_callbacks.py
@@ -37,17 +37,17 @@
 DATASET_CALLBACKS_MODULE = "datadoc.frontend.callbacks.dataset"
 
 
-@pytest.fixture()
+@pytest.fixture
 def n_clicks_1():
     return 1
 
 
-@pytest.fixture()
+@pytest.fixture
 def file_path():
     return "valid/path/to/file.json"
 
 
-@pytest.fixture()
+@pytest.fixture
 def file_path_without_dates():
     return "valid/path/to/person_data_v1.parquet"