Skip to content

Commit

Permalink
Merge pull request #99 from chaen/telemetry
Browse files Browse the repository at this point in the history
telemetry
  • Loading branch information
chrisburr authored Jun 3, 2024
2 parents 608487d + 186d1e7 commit a0b0af6
Show file tree
Hide file tree
Showing 8 changed files with 182 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ jobs:
- name: Start demo
run: |
git clone https://github.com/DIRACGrid/diracx-charts.git ../diracx-charts
../diracx-charts/run_demo.sh --enable-coverage --exit-when-done --set-value developer.autoReload=false $PWD
../diracx-charts/run_demo.sh --enable-open-telemetry --enable-coverage --exit-when-done --set-value developer.autoReload=false $PWD
- name: Debugging information
run: |
DIRACX_DEMO_DIR=$PWD/../diracx-charts/.demo
Expand Down
5 changes: 5 additions & 0 deletions diracx-routers/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ dependencies = [
"httpx",
"pydantic",
"sqlalchemy",
"opentelemetry-api",
"opentelemetry-exporter-otlp",
"opentelemetry-instrumentation-fastapi",
"opentelemetry-instrumentation-logging",
"opentelemetry-sdk",
]
dynamic = ["version"]

Expand Down
52 changes: 48 additions & 4 deletions diracx-routers/src/diracx/routers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
from collections.abc import AsyncGenerator
from functools import partial
from logging import Formatter, StreamHandler
from typing import Any, Awaitable, Callable, Iterable, TypeVar, cast

import dotenv
Expand All @@ -16,11 +17,11 @@
from fastapi.routing import APIRoute
from pydantic import parse_raw_as

# from starlette.types import ASGIApp
from uvicorn.logging import AccessFormatter, DefaultFormatter

from diracx.core.config import ConfigSource
from diracx.core.exceptions import (
DiracError,
DiracHttpResponse,
)
from diracx.core.exceptions import DiracError, DiracHttpResponse
from diracx.core.extensions import select_from_extension
from diracx.core.settings import ServiceSettingsBase
from diracx.core.utils import dotenv_files_from_environment
Expand All @@ -30,6 +31,7 @@

from .auth import verify_dirac_access_token
from .fastapi_classes import DiracFastAPI, DiracxRouter
from .otel import instrument_otel

T = TypeVar("T")
T2 = TypeVar("T2", bound=BaseSQLDB | BaseOSDB)
Expand All @@ -38,6 +40,45 @@
logger = logging.getLogger(__name__)


###########################################3


def configure_logger():
"""Configure the console logger
Access logs come from uvicorn, which configure its logger in a certain way
(https://github.com/tiangolo/fastapi/discussions/7457)
This method adds a timestamp to the uvicorn output,
and define a console handler for all the diracx loggers
We cannot configure just the root handler, as uvicorn
attaches handler to the `uvicorn` logger
"""

diracx_handler = StreamHandler()
diracx_handler.setFormatter(Formatter("%(asctime)s - %(levelname)s - %(message)s"))
logging.getLogger("diracx").addHandler(diracx_handler)
logging.getLogger("diracx").setLevel("INFO")

# Recreate the formatters for the uvicorn loggers adding the timestamp
uvicorn_access_logger = logging.getLogger("uvicorn.access")
try:
previous_fmt = uvicorn_access_logger.handlers[0].formatter._fmt
new_format = f"%(asctime)s - {previous_fmt}"
uvicorn_access_logger.handlers[0].setFormatter(AccessFormatter(new_format))
# There may not be any handler defined, like in the CI
except IndexError:
pass

uvicorn_logger = logging.getLogger("uvicorn")
try:
previous_fmt = uvicorn_logger.handlers[0].formatter._fmt
new_format = f"%(asctime)s - {previous_fmt}"
uvicorn_logger.handlers[0].setFormatter(DefaultFormatter(new_format))
# There may not be any handler defined, like in the CI
except IndexError:
pass


# Rules:
# All routes must have tags (needed for auto gen of client)
# Form headers must have a description (autogen)
Expand Down Expand Up @@ -189,6 +230,9 @@ def create_app_inner(
allow_headers=["*"],
)

configure_logger()
instrument_otel(app)

return app


Expand Down
116 changes: 116 additions & 0 deletions diracx-routers/src/diracx/routers/otel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import logging
import os

from fastapi import FastAPI

# https://opentelemetry.io/blog/2023/logs-collection/
# https://github.com/mhausenblas/ref.otel.help/blob/main/how-to/logs-collection/yoda/main.py
from opentelemetry import _logs, metrics, trace
from opentelemetry.exporter.otlp.proto.grpc._log_exporter import OTLPLogExporter
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
from opentelemetry.instrumentation.fastapi import FastAPIInstrumentor
from opentelemetry.instrumentation.logging import LoggingInstrumentor
from opentelemetry.instrumentation.logging.constants import DEFAULT_LOGGING_FORMAT
from opentelemetry.sdk._logs import LoggerProvider, LoggingHandler
from opentelemetry.sdk._logs.export import BatchLogRecordProcessor
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.resources import Resource
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor

from diracx.core.settings import ServiceSettingsBase


class OTELSettings(ServiceSettingsBase, env_prefix="DIRACX_OTEL_"):
"""Settings for the Open Telemetry Configuration."""

enabled: bool = False
application_name: str = "diracx"
grpc_endpoint: str = ""
grpc_insecure: bool = True


def instrument_otel(app: FastAPI) -> None:
"""
Instrument the application to send OpenTelemetryData.
Metrics, Traces and Logs are sent to an OTEL collector.
The Collector can then redirect it to whatever is configured.
Typically: Jaeger for traces, Prometheus for metrics, ElasticSearch for logs
Note: this is highly experimental, and OpenTelemetry is a quickly moving target
"""

otel_settings = OTELSettings()
if not otel_settings.enabled:
return

# set the service name to show in traces
resource = Resource.create(
attributes={
"service.name": otel_settings.application_name,
"service.instance.id": os.uname().nodename,
}
)

# set the tracer provider
tracer_provider = TracerProvider(resource=resource)

# elif MODE == "otel-collector-http":
# tracer.add_span_processor(
# BatchSpanProcessor(OTLPSpanExporterHTTP(endpoint=OTEL_HTTP_ENDPOINT))
# )
# else:
# default otel-collector-grpc
tracer_provider.add_span_processor(
BatchSpanProcessor(
OTLPSpanExporter(
endpoint=otel_settings.grpc_endpoint,
insecure=otel_settings.grpc_insecure,
)
)
)
trace.set_tracer_provider(tracer_provider)

# metric_reader = PeriodicExportingMetricReader(ConsoleMetricExporter(),export_interval_millis=1000)
metric_reader = PeriodicExportingMetricReader(
OTLPMetricExporter(
endpoint=otel_settings.grpc_endpoint,
insecure=otel_settings.grpc_insecure,
),
export_interval_millis=3000,
)
meter_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(meter_provider)

###################################

# # override logger format which with trace id and span id
# https://github.com/mhausenblas/ref.otel.help/blob/main/how-to/logs-collection/yoda/main.py

LoggingInstrumentor().instrument(set_logging_format=False)

logger_provider = LoggerProvider(resource=resource)
_logs.set_logger_provider(logger_provider)

otlp_exporter = OTLPLogExporter(
endpoint=otel_settings.grpc_endpoint,
insecure=otel_settings.grpc_insecure,
)
logger_provider.add_log_record_processor(BatchLogRecordProcessor(otlp_exporter))
handler = LoggingHandler(level=logging.DEBUG, logger_provider=logger_provider)
handler.setFormatter(logging.Formatter(DEFAULT_LOGGING_FORMAT))
# Add the handler to diracx and all uvicorn logger
# Note adding it to just 'uvicorn' or the root logger
# is not enough because uvicorn sets propagate=False
for logger_name in logging.root.manager.loggerDict:
if "diracx" == logger_name or "uvicorn" in logger_name:
logging.getLogger(logger_name).addHandler(handler)

####################

FastAPIInstrumentor.instrument_app(
app, tracer_provider=tracer_provider, meter_provider=meter_provider
)
12 changes: 12 additions & 0 deletions docs/OPENTELEMETRY.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# OpenTelemetry

> :warning: **Experimental**: opentelemetry is an evolving product, and so is our implementation of it.
``diracx`` is capable of sending [OpenTelemetry](https://opentelemetry.io/) data to a collector. The settings are controled by the
``diracx.routers.otel.OTELSettings`` classes

``diracx`` will then export metrics, traces, and logs. For the moment, nothing is really instrumented, but the infrastructure is there

![OTEL Logs](./otel-logs.png)
![OTEL Metrics](./otel-metrics.png)
![OTEL Traces](./otel-traces.png)
Binary file added docs/otel-logs.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/otel-metrics.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/otel-traces.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit a0b0af6

Please sign in to comment.