diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0f60ed7a..bf109844 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -82,7 +82,7 @@ jobs: - name: Start demo run: | git clone https://github.com/DIRACGrid/diracx-charts.git ../diracx-charts - ../diracx-charts/run_demo.sh --enable-coverage --exit-when-done --set-value developer.autoReload=false $PWD + ../diracx-charts/run_demo.sh --enable-open-telemetry --enable-coverage --exit-when-done --set-value developer.autoReload=false $PWD - name: Debugging information run: | DIRACX_DEMO_DIR=$PWD/../diracx-charts/.demo diff --git a/diracx-routers/pyproject.toml b/diracx-routers/pyproject.toml index 9f32ee40..e31edbca 100644 --- a/diracx-routers/pyproject.toml +++ b/diracx-routers/pyproject.toml @@ -26,6 +26,11 @@ dependencies = [ "httpx", "pydantic", "sqlalchemy", + "opentelemetry-api", + "opentelemetry-exporter-otlp", + "opentelemetry-instrumentation-fastapi", + "opentelemetry-instrumentation-logging", + "opentelemetry-sdk", ] dynamic = ["version"] diff --git a/diracx-routers/src/diracx/routers/__init__.py b/diracx-routers/src/diracx/routers/__init__.py index 18d5ab10..e39305c4 100644 --- a/diracx-routers/src/diracx/routers/__init__.py +++ b/diracx-routers/src/diracx/routers/__init__.py @@ -5,6 +5,7 @@ import os from collections.abc import AsyncGenerator from functools import partial +from logging import Formatter, StreamHandler from typing import Any, Awaitable, Callable, Iterable, TypeVar, cast import dotenv @@ -16,11 +17,11 @@ from fastapi.routing import APIRoute from pydantic import parse_raw_as +# from starlette.types import ASGIApp +from uvicorn.logging import AccessFormatter, DefaultFormatter + from diracx.core.config import ConfigSource -from diracx.core.exceptions import ( - DiracError, - DiracHttpResponse, -) +from diracx.core.exceptions import DiracError, DiracHttpResponse from diracx.core.extensions import select_from_extension from diracx.core.settings import ServiceSettingsBase from diracx.core.utils import dotenv_files_from_environment @@ -30,6 +31,7 @@ from .auth import verify_dirac_access_token from .fastapi_classes import DiracFastAPI, DiracxRouter +from .otel import instrument_otel T = TypeVar("T") T2 = TypeVar("T2", bound=BaseSQLDB | BaseOSDB) @@ -38,6 +40,45 @@ logger = logging.getLogger(__name__) +###########################################3 + + +def configure_logger(): + """Configure the console logger + + Access logs come from uvicorn, which configure its logger in a certain way + (https://github.com/tiangolo/fastapi/discussions/7457) + This method adds a timestamp to the uvicorn output, + and define a console handler for all the diracx loggers + We cannot configure just the root handler, as uvicorn + attaches handler to the `uvicorn` logger + """ + + diracx_handler = StreamHandler() + diracx_handler.setFormatter(Formatter("%(asctime)s - %(levelname)s - %(message)s")) + logging.getLogger("diracx").addHandler(diracx_handler) + logging.getLogger("diracx").setLevel("INFO") + + # Recreate the formatters for the uvicorn loggers adding the timestamp + uvicorn_access_logger = logging.getLogger("uvicorn.access") + try: + previous_fmt = uvicorn_access_logger.handlers[0].formatter._fmt + new_format = f"%(asctime)s - {previous_fmt}" + uvicorn_access_logger.handlers[0].setFormatter(AccessFormatter(new_format)) + # There may not be any handler defined, like in the CI + except IndexError: + pass + + uvicorn_logger = logging.getLogger("uvicorn") + try: + previous_fmt = uvicorn_logger.handlers[0].formatter._fmt + new_format = f"%(asctime)s - {previous_fmt}" + uvicorn_logger.handlers[0].setFormatter(DefaultFormatter(new_format)) + # There may not be any handler defined, like in the CI + except IndexError: + pass + + # Rules: # All routes must have tags (needed for auto gen of client) # Form headers must have a description (autogen) @@ -189,6 +230,9 @@ def create_app_inner( allow_headers=["*"], ) + configure_logger() + instrument_otel(app) + return app diff --git a/diracx-routers/src/diracx/routers/otel.py b/diracx-routers/src/diracx/routers/otel.py new file mode 100644 index 00000000..37e12630 --- /dev/null +++ b/diracx-routers/src/diracx/routers/otel.py @@ -0,0 +1,116 @@ +import logging +import os + +from fastapi import FastAPI + +# https://opentelemetry.io/blog/2023/logs-collection/ +# https://github.com/mhausenblas/ref.otel.help/blob/main/how-to/logs-collection/yoda/main.py +from opentelemetry import _logs, metrics, trace +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter +from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor +from opentelemetry.instrumentation.logging import LoggingInstrumentor +from opentelemetry.instrumentation.logging.constants import DEFAULT_LOGGING_FORMAT +from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +from diracx.core.settings import ServiceSettingsBase + + +class OTELSettings(ServiceSettingsBase, env_prefix="DIRACX_OTEL_"): + """Settings for the Open Telemetry Configuration.""" + + enabled: bool = False + application_name: str = "diracx" + grpc_endpoint: str = "" + grpc_insecure: bool = True + + +def instrument_otel(app: FastAPI) -> None: + """ + Instrument the application to send OpenTelemetryData. + Metrics, Traces and Logs are sent to an OTEL collector. + The Collector can then redirect it to whatever is configured. + Typically: Jaeger for traces, Prometheus for metrics, ElasticSearch for logs + + Note: this is highly experimental, and OpenTelemetry is a quickly moving target + + """ + + otel_settings = OTELSettings() + if not otel_settings.enabled: + return + + # set the service name to show in traces + resource = Resource.create( + attributes={ + "service.name": otel_settings.application_name, + "service.instance.id": os.uname().nodename, + } + ) + + # set the tracer provider + tracer_provider = TracerProvider(resource=resource) + + # elif MODE == "otel-collector-http": + # tracer.add_span_processor( + # BatchSpanProcessor(OTLPSpanExporterHTTP(endpoint=OTEL_HTTP_ENDPOINT)) + # ) + # else: + # default otel-collector-grpc + tracer_provider.add_span_processor( + BatchSpanProcessor( + OTLPSpanExporter( + endpoint=otel_settings.grpc_endpoint, + insecure=otel_settings.grpc_insecure, + ) + ) + ) + trace.set_tracer_provider(tracer_provider) + + # metric_reader = PeriodicExportingMetricReader(ConsoleMetricExporter(),export_interval_millis=1000) + metric_reader = PeriodicExportingMetricReader( + OTLPMetricExporter( + endpoint=otel_settings.grpc_endpoint, + insecure=otel_settings.grpc_insecure, + ), + export_interval_millis=3000, + ) + meter_provider = MeterProvider(metric_readers=[metric_reader]) + metrics.set_meter_provider(meter_provider) + + ################################### + + # # override logger format which with trace id and span id + # https://github.com/mhausenblas/ref.otel.help/blob/main/how-to/logs-collection/yoda/main.py + + LoggingInstrumentor().instrument(set_logging_format=False) + + logger_provider = LoggerProvider(resource=resource) + _logs.set_logger_provider(logger_provider) + + otlp_exporter = OTLPLogExporter( + endpoint=otel_settings.grpc_endpoint, + insecure=otel_settings.grpc_insecure, + ) + logger_provider.add_log_record_processor(BatchLogRecordProcessor(otlp_exporter)) + handler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider) + handler.setFormatter(logging.Formatter(DEFAULT_LOGGING_FORMAT)) + # Add the handler to diracx and all uvicorn logger + # Note adding it to just 'uvicorn' or the root logger + # is not enough because uvicorn sets propagate=False + for logger_name in logging.root.manager.loggerDict: + if "diracx" == logger_name or "uvicorn" in logger_name: + logging.getLogger(logger_name).addHandler(handler) + + #################### + + FastAPIInstrumentor.instrument_app( + app, tracer_provider=tracer_provider, meter_provider=meter_provider + ) diff --git a/docs/OPENTELEMETRY.md b/docs/OPENTELEMETRY.md new file mode 100644 index 00000000..a209c805 --- /dev/null +++ b/docs/OPENTELEMETRY.md @@ -0,0 +1,12 @@ +# OpenTelemetry + +> :warning: **Experimental**: opentelemetry is an evolving product, and so is our implementation of it. + +``diracx`` is capable of sending [OpenTelemetry](https://opentelemetry.io/) data to a collector. The settings are controled by the +``diracx.routers.otel.OTELSettings`` classes + +``diracx`` will then export metrics, traces, and logs. For the moment, nothing is really instrumented, but the infrastructure is there + +![OTEL Logs](./otel-logs.png) +![OTEL Metrics](./otel-metrics.png) +![OTEL Traces](./otel-traces.png) diff --git a/docs/otel-logs.png b/docs/otel-logs.png new file mode 100644 index 00000000..c92af58c Binary files /dev/null and b/docs/otel-logs.png differ diff --git a/docs/otel-metrics.png b/docs/otel-metrics.png new file mode 100644 index 00000000..0bfb74e8 Binary files /dev/null and b/docs/otel-metrics.png differ diff --git a/docs/otel-traces.png b/docs/otel-traces.png new file mode 100644 index 00000000..8d83d227 Binary files /dev/null and b/docs/otel-traces.png differ