Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

telemetry #99

Merged
merged 1 commit into from
Jun 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
- name: Start demo
run: |
git clone https://github.com/DIRACGrid/diracx-charts.git ../diracx-charts
../diracx-charts/run_demo.sh --enable-coverage --exit-when-done --set-value developer.autoReload=false $PWD
../diracx-charts/run_demo.sh --enable-open-telemetry --enable-coverage --exit-when-done --set-value developer.autoReload=false $PWD
- name: Debugging information
run: |
DIRACX_DEMO_DIR=$PWD/../diracx-charts/.demo
Expand Down
5 changes: 5 additions & 0 deletions diracx-routers/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ dependencies = [
"httpx",
"pydantic",
"sqlalchemy",
"opentelemetry-api",
"opentelemetry-exporter-otlp",
"opentelemetry-instrumentation-fastapi",
"opentelemetry-instrumentation-logging",
"opentelemetry-sdk",
]
dynamic = ["version"]

Expand Down
52 changes: 48 additions & 4 deletions diracx-routers/src/diracx/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
from collections.abc import AsyncGenerator
from functools import partial
from logging import Formatter, StreamHandler
from typing import Any, Awaitable, Callable, Iterable, TypeVar, cast

import dotenv
Expand All @@ -16,11 +17,11 @@
from fastapi.routing import APIRoute
from pydantic import parse_raw_as

# from starlette.types import ASGIApp
from uvicorn.logging import AccessFormatter, DefaultFormatter

from diracx.core.config import ConfigSource
from diracx.core.exceptions import (
DiracError,
DiracHttpResponse,
)
from diracx.core.exceptions import DiracError, DiracHttpResponse
from diracx.core.extensions import select_from_extension
from diracx.core.settings import ServiceSettingsBase
from diracx.core.utils import dotenv_files_from_environment
Expand All @@ -30,6 +31,7 @@

from .auth import verify_dirac_access_token
from .fastapi_classes import DiracFastAPI, DiracxRouter
from .otel import instrument_otel

T = TypeVar("T")
T2 = TypeVar("T2", bound=BaseSQLDB | BaseOSDB)
Expand All @@ -38,6 +40,45 @@
logger = logging.getLogger(__name__)


###########################################3


def configure_logger():
"""Configure the console logger

Access logs come from uvicorn, which configure its logger in a certain way
(https://github.com/tiangolo/fastapi/discussions/7457)
This method adds a timestamp to the uvicorn output,
and define a console handler for all the diracx loggers
We cannot configure just the root handler, as uvicorn
attaches handler to the `uvicorn` logger
"""

diracx_handler = StreamHandler()
diracx_handler.setFormatter(Formatter("%(asctime)s - %(levelname)s - %(message)s"))
logging.getLogger("diracx").addHandler(diracx_handler)
logging.getLogger("diracx").setLevel("INFO")

# Recreate the formatters for the uvicorn loggers adding the timestamp
uvicorn_access_logger = logging.getLogger("uvicorn.access")
try:
previous_fmt = uvicorn_access_logger.handlers[0].formatter._fmt
new_format = f"%(asctime)s - {previous_fmt}"
uvicorn_access_logger.handlers[0].setFormatter(AccessFormatter(new_format))
# There may not be any handler defined, like in the CI
except IndexError:
pass

uvicorn_logger = logging.getLogger("uvicorn")
try:
previous_fmt = uvicorn_logger.handlers[0].formatter._fmt
new_format = f"%(asctime)s - {previous_fmt}"
uvicorn_logger.handlers[0].setFormatter(DefaultFormatter(new_format))
# There may not be any handler defined, like in the CI
except IndexError:
pass


# Rules:
# All routes must have tags (needed for auto gen of client)
# Form headers must have a description (autogen)
Expand Down Expand Up @@ -189,6 +230,9 @@ def create_app_inner(
allow_headers=["*"],
)

configure_logger()
instrument_otel(app)

return app


Expand Down
116 changes: 116 additions & 0 deletions diracx-routers/src/diracx/routers/otel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import logging
import os

from fastapi import FastAPI

# https://opentelemetry.io/blog/2023/logs-collection/
# https://github.com/mhausenblas/ref.otel.help/blob/main/how-to/logs-collection/yoda/main.py
from opentelemetry import _logs, metrics, trace
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.logging import LoggingInstrumentor
from opentelemetry.instrumentation.logging.constants import DEFAULT_LOGGING_FORMAT
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from diracx.core.settings import ServiceSettingsBase


class OTELSettings(ServiceSettingsBase, env_prefix="DIRACX_OTEL_"):
"""Settings for the Open Telemetry Configuration."""

enabled: bool = False
application_name: str = "diracx"
grpc_endpoint: str = ""
grpc_insecure: bool = True


def instrument_otel(app: FastAPI) -> None:
"""
Instrument the application to send OpenTelemetryData.
Metrics, Traces and Logs are sent to an OTEL collector.
The Collector can then redirect it to whatever is configured.
Typically: Jaeger for traces, Prometheus for metrics, ElasticSearch for logs

Note: this is highly experimental, and OpenTelemetry is a quickly moving target

"""

otel_settings = OTELSettings()
if not otel_settings.enabled:
return

# set the service name to show in traces
resource = Resource.create(
attributes={
"service.name": otel_settings.application_name,
"service.instance.id": os.uname().nodename,
}
)

# set the tracer provider
tracer_provider = TracerProvider(resource=resource)

# elif MODE == "otel-collector-http":
# tracer.add_span_processor(
# BatchSpanProcessor(OTLPSpanExporterHTTP(endpoint=OTEL_HTTP_ENDPOINT))
# )
# else:
# default otel-collector-grpc
tracer_provider.add_span_processor(
BatchSpanProcessor(
OTLPSpanExporter(
endpoint=otel_settings.grpc_endpoint,
insecure=otel_settings.grpc_insecure,
)
)
)
trace.set_tracer_provider(tracer_provider)

# metric_reader = PeriodicExportingMetricReader(ConsoleMetricExporter(),export_interval_millis=1000)
metric_reader = PeriodicExportingMetricReader(
OTLPMetricExporter(
endpoint=otel_settings.grpc_endpoint,
insecure=otel_settings.grpc_insecure,
),
export_interval_millis=3000,
)
meter_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)

###################################

# # override logger format which with trace id and span id
# https://github.com/mhausenblas/ref.otel.help/blob/main/how-to/logs-collection/yoda/main.py

LoggingInstrumentor().instrument(set_logging_format=False)

logger_provider = LoggerProvider(resource=resource)
_logs.set_logger_provider(logger_provider)

otlp_exporter = OTLPLogExporter(
endpoint=otel_settings.grpc_endpoint,
insecure=otel_settings.grpc_insecure,
)
logger_provider.add_log_record_processor(BatchLogRecordProcessor(otlp_exporter))
handler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider)
handler.setFormatter(logging.Formatter(DEFAULT_LOGGING_FORMAT))
# Add the handler to diracx and all uvicorn logger
# Note adding it to just 'uvicorn' or the root logger
# is not enough because uvicorn sets propagate=False
for logger_name in logging.root.manager.loggerDict:
if "diracx" == logger_name or "uvicorn" in logger_name:
logging.getLogger(logger_name).addHandler(handler)

####################

FastAPIInstrumentor.instrument_app(
app, tracer_provider=tracer_provider, meter_provider=meter_provider
)
12 changes: 12 additions & 0 deletions docs/OPENTELEMETRY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# OpenTelemetry

> :warning: **Experimental**: opentelemetry is an evolving product, and so is our implementation of it.

``diracx`` is capable of sending [OpenTelemetry](https://opentelemetry.io/) data to a collector. The settings are controled by the
``diracx.routers.otel.OTELSettings`` classes

``diracx`` will then export metrics, traces, and logs. For the moment, nothing is really instrumented, but the infrastructure is there

![OTEL Logs](./otel-logs.png)
![OTEL Metrics](./otel-metrics.png)
![OTEL Traces](./otel-traces.png)
Binary file added docs/otel-logs.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/otel-metrics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/otel-traces.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.