From 5eced1cf4b83f200b2ed34876986284dd3fadb75 Mon Sep 17 00:00:00 2001 From: David Montague <35119617+dmontagu@users.noreply.github.com> Date: Fri, 20 Dec 2024 02:24:34 -0700 Subject: [PATCH] Use `OPEN_SPANS` and custom `record_exception` logic for all spans (#696) Co-authored-by: Alex Hall --- logfire-api/logfire_api/_internal/config.pyi | 6 +- logfire-api/logfire_api/_internal/main.pyi | 10 +-- logfire-api/logfire_api/_internal/tracer.pyi | 15 +++- logfire/_internal/backfill.py | 6 +- logfire/_internal/config.py | 9 +- logfire/_internal/main.py | 94 ++++---------------- logfire/_internal/tracer.py | 76 ++++++++++++++-- 7 files changed, 110 insertions(+), 106 deletions(-) diff --git a/logfire-api/logfire_api/_internal/config.pyi b/logfire-api/logfire_api/_internal/config.pyi index 26c03b6a0..f7fa80468 100644 --- a/logfire-api/logfire_api/_internal/config.pyi +++ b/logfire-api/logfire_api/_internal/config.pyi @@ -12,11 +12,11 @@ from .exporters.quiet_metrics import QuietMetricExporter as QuietMetricExporter from .exporters.remove_pending import RemovePendingSpansExporter as RemovePendingSpansExporter from .exporters.test import TestExporter as TestExporter from .integrations.executors import instrument_executors as instrument_executors -from .main import FastLogfireSpan as FastLogfireSpan, Logfire as Logfire, LogfireSpan as LogfireSpan +from .main import Logfire as Logfire from .metrics import ProxyMeterProvider as ProxyMeterProvider from .scrubbing import BaseScrubber as BaseScrubber, NOOP_SCRUBBER as NOOP_SCRUBBER, Scrubber as Scrubber, ScrubbingOptions as ScrubbingOptions from .stack_info import warn_at_user_stacklevel as warn_at_user_stacklevel -from .tracer import PendingSpanProcessor as PendingSpanProcessor, ProxyTracerProvider as ProxyTracerProvider +from .tracer import OPEN_SPANS as OPEN_SPANS, PendingSpanProcessor as PendingSpanProcessor, ProxyTracerProvider as ProxyTracerProvider from .utils import SeededRandomIdGenerator as SeededRandomIdGenerator, UnexpectedResponse as UnexpectedResponse, ensure_data_dir_exists as ensure_data_dir_exists, handle_internal_errors as handle_internal_errors, read_toml_file as read_toml_file, suppress_instrumentation as suppress_instrumentation from _typeshed import Incomplete from dataclasses import dataclass @@ -30,9 +30,7 @@ from opentelemetry.sdk.trace.id_generator import IdGenerator from pathlib import Path from typing import Any, Callable, Literal, Sequence, TypedDict from typing_extensions import Self, Unpack -from weakref import WeakSet -OPEN_SPANS: WeakSet[LogfireSpan | FastLogfireSpan] CREDENTIALS_FILENAME: str COMMON_REQUEST_HEADERS: Incomplete PROJECT_NAME_PATTERN: str diff --git a/logfire-api/logfire_api/_internal/main.pyi b/logfire-api/logfire_api/_internal/main.pyi index 208cc278b..7d54718e2 100644 --- a/logfire-api/logfire_api/_internal/main.pyi +++ b/logfire-api/logfire_api/_internal/main.pyi @@ -6,9 +6,9 @@ import requests from . import async_ as async_ from ..version import VERSION as VERSION from .auto_trace import AutoTraceModule as AutoTraceModule, install_auto_tracing as install_auto_tracing -from .config import GLOBAL_CONFIG as GLOBAL_CONFIG, LogfireConfig as LogfireConfig, OPEN_SPANS as OPEN_SPANS +from .config import GLOBAL_CONFIG as GLOBAL_CONFIG, LogfireConfig as LogfireConfig from .config_params import PydanticPluginRecordValues as PydanticPluginRecordValues -from .constants import ATTRIBUTES_JSON_SCHEMA_KEY as ATTRIBUTES_JSON_SCHEMA_KEY, ATTRIBUTES_LOG_LEVEL_NUM_KEY as ATTRIBUTES_LOG_LEVEL_NUM_KEY, ATTRIBUTES_MESSAGE_KEY as ATTRIBUTES_MESSAGE_KEY, ATTRIBUTES_MESSAGE_TEMPLATE_KEY as ATTRIBUTES_MESSAGE_TEMPLATE_KEY, ATTRIBUTES_SAMPLE_RATE_KEY as ATTRIBUTES_SAMPLE_RATE_KEY, ATTRIBUTES_SPAN_TYPE_KEY as ATTRIBUTES_SPAN_TYPE_KEY, ATTRIBUTES_TAGS_KEY as ATTRIBUTES_TAGS_KEY, ATTRIBUTES_VALIDATION_ERROR_KEY as ATTRIBUTES_VALIDATION_ERROR_KEY, DISABLE_CONSOLE_KEY as DISABLE_CONSOLE_KEY, LEVEL_NUMBERS as LEVEL_NUMBERS, LevelName as LevelName, NULL_ARGS_KEY as NULL_ARGS_KEY, OTLP_MAX_INT_SIZE as OTLP_MAX_INT_SIZE, log_level_attributes as log_level_attributes +from .constants import ATTRIBUTES_JSON_SCHEMA_KEY as ATTRIBUTES_JSON_SCHEMA_KEY, ATTRIBUTES_LOG_LEVEL_NUM_KEY as ATTRIBUTES_LOG_LEVEL_NUM_KEY, ATTRIBUTES_MESSAGE_KEY as ATTRIBUTES_MESSAGE_KEY, ATTRIBUTES_MESSAGE_TEMPLATE_KEY as ATTRIBUTES_MESSAGE_TEMPLATE_KEY, ATTRIBUTES_SAMPLE_RATE_KEY as ATTRIBUTES_SAMPLE_RATE_KEY, ATTRIBUTES_SPAN_TYPE_KEY as ATTRIBUTES_SPAN_TYPE_KEY, ATTRIBUTES_TAGS_KEY as ATTRIBUTES_TAGS_KEY, DISABLE_CONSOLE_KEY as DISABLE_CONSOLE_KEY, LEVEL_NUMBERS as LEVEL_NUMBERS, LevelName as LevelName, NULL_ARGS_KEY as NULL_ARGS_KEY, OTLP_MAX_INT_SIZE as OTLP_MAX_INT_SIZE, log_level_attributes as log_level_attributes from .formatter import logfire_format as logfire_format, logfire_format_with_magic as logfire_format_with_magic from .instrument import instrument as instrument from .integrations.asgi import ASGIApp as ASGIApp, ASGIInstrumentKwargs as ASGIInstrumentKwargs @@ -29,7 +29,7 @@ from .json_encoder import logfire_json_dumps as logfire_json_dumps from .json_schema import JsonSchemaProperties as JsonSchemaProperties, attributes_json_schema as attributes_json_schema, attributes_json_schema_properties as attributes_json_schema_properties, create_json_schema as create_json_schema from .metrics import ProxyMeterProvider as ProxyMeterProvider from .stack_info import get_user_stack_info as get_user_stack_info -from .tracer import ProxyTracerProvider as ProxyTracerProvider +from .tracer import ProxyTracerProvider as ProxyTracerProvider, record_exception as record_exception, set_exception_status as set_exception_status from .utils import SysExcInfo as SysExcInfo, get_version as get_version, handle_internal_errors as handle_internal_errors, log_internal_error as log_internal_error, uniquify_sequence as uniquify_sequence from django.http import HttpRequest as HttpRequest, HttpResponse as HttpResponse from fastapi import FastAPI @@ -1034,7 +1034,7 @@ class LogfireSpan(ReadableSpan): def message(self) -> str: ... @message.setter def message(self, message: str): ... - def end(self) -> None: + def end(self, end_time: int | None = None) -> None: """Sets the current time as the span's end time. The span's end time is the wall time at which the operation finished. @@ -1093,7 +1093,7 @@ class NoopSpan: def is_recording(self) -> bool: ... AttributesValueType = TypeVar('AttributesValueType', bound=Any | otel_types.AttributeValue) -def user_attributes(attributes: dict[str, Any]) -> dict[str, otel_types.AttributeValue]: +def prepare_otlp_attributes(attributes: dict[str, Any]) -> dict[str, otel_types.AttributeValue]: """Prepare attributes for sending to OpenTelemetry. This will convert any non-OpenTelemetry compatible types to JSON. diff --git a/logfire-api/logfire_api/_internal/tracer.pyi b/logfire-api/logfire_api/_internal/tracer.pyi index 4dedab6e3..bf8f7e75b 100644 --- a/logfire-api/logfire_api/_internal/tracer.pyi +++ b/logfire-api/logfire_api/_internal/tracer.pyi @@ -1,6 +1,7 @@ import opentelemetry.trace as trace_api from .config import LogfireConfig as LogfireConfig -from .constants import ATTRIBUTES_MESSAGE_KEY as ATTRIBUTES_MESSAGE_KEY, ATTRIBUTES_PENDING_SPAN_REAL_PARENT_KEY as ATTRIBUTES_PENDING_SPAN_REAL_PARENT_KEY, ATTRIBUTES_SAMPLE_RATE_KEY as ATTRIBUTES_SAMPLE_RATE_KEY, ATTRIBUTES_SPAN_TYPE_KEY as ATTRIBUTES_SPAN_TYPE_KEY, PENDING_SPAN_NAME_SUFFIX as PENDING_SPAN_NAME_SUFFIX +from .constants import ATTRIBUTES_MESSAGE_KEY as ATTRIBUTES_MESSAGE_KEY, ATTRIBUTES_PENDING_SPAN_REAL_PARENT_KEY as ATTRIBUTES_PENDING_SPAN_REAL_PARENT_KEY, ATTRIBUTES_SAMPLE_RATE_KEY as ATTRIBUTES_SAMPLE_RATE_KEY, ATTRIBUTES_SPAN_TYPE_KEY as ATTRIBUTES_SPAN_TYPE_KEY, ATTRIBUTES_VALIDATION_ERROR_KEY as ATTRIBUTES_VALIDATION_ERROR_KEY, PENDING_SPAN_NAME_SUFFIX as PENDING_SPAN_NAME_SUFFIX, log_level_attributes as log_level_attributes +from .utils import handle_internal_errors as handle_internal_errors from _typeshed import Incomplete from dataclasses import dataclass from opentelemetry import context as context_api @@ -13,7 +14,9 @@ from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util import types as otel_types from threading import Lock from typing import Any, Callable, Mapping, Sequence -from weakref import WeakKeyDictionary +from weakref import WeakKeyDictionary, WeakSet + +OPEN_SPANS: WeakSet[_LogfireWrappedSpan] @dataclass class ProxyTracerProvider(TracerProvider): @@ -32,11 +35,12 @@ class ProxyTracerProvider(TracerProvider): def resource(self) -> Resource: ... def force_flush(self, timeout_millis: int = 30000) -> bool: ... -@dataclass -class _MaybeDeterministicTimestampSpan(trace_api.Span, ReadableSpan): +@dataclass(eq=False) +class _LogfireWrappedSpan(trace_api.Span, ReadableSpan): """Span that overrides end() to use a timestamp generator if one was provided.""" span: Span ns_timestamp_generator: Callable[[], int] + def __post_init__(self) -> None: ... def end(self, end_time: int | None = None) -> None: ... def get_span_context(self) -> SpanContext: ... def set_attributes(self, attributes: dict[str, otel_types.AttributeValue]) -> None: ... @@ -85,3 +89,6 @@ def should_sample(span_context: SpanContext, attributes: Mapping[str, otel_types This is used to sample spans that are not sampled by the OTEL sampler. """ def get_sample_rate_from_attributes(attributes: otel_types.Attributes) -> float | None: ... +def record_exception(span: trace_api.Span, exception: BaseException, *, attributes: otel_types.Attributes = None, timestamp: int | None = None, escaped: bool = False) -> None: + """Similar to the OTEL SDK Span.record_exception method, with our own additions.""" +def set_exception_status(span: trace_api.Span, exception: BaseException): ... diff --git a/logfire/_internal/backfill.py b/logfire/_internal/backfill.py index 724d8009f..e4fd1189b 100644 --- a/logfire/_internal/backfill.py +++ b/logfire/_internal/backfill.py @@ -24,7 +24,7 @@ ) from .exporters.file import FileSpanExporter from .formatter import logfire_format -from .main import user_attributes +from .main import prepare_otlp_attributes from .scrubbing import Scrubber try: @@ -150,7 +150,7 @@ def write(self, data: Union[Log, Span]) -> None: ) else: parent_context = None # pragma: no cover - otlp_attributes = user_attributes(data.attributes) + otlp_attributes = prepare_otlp_attributes(data.attributes) if data.formatted_msg is None: # pragma: no cover formatted_message = logfire_format(data.msg_template, data.attributes, self.scrubber) @@ -196,7 +196,7 @@ def write(self, data: Union[Log, Span]) -> None: start_timestamp = data.start_timestamp if start_timestamp.tzinfo is None: # pragma: no branch start_timestamp = start_timestamp.replace(tzinfo=timezone.utc) - otlp_attributes = user_attributes(data.log_attributes) + otlp_attributes = prepare_otlp_attributes(data.log_attributes) if data.formatted_msg is None: # pragma: no branch formatted_message = logfire_format(data.msg_template, data.log_attributes, self.scrubber) else: # pragma: no cover diff --git a/logfire/_internal/config.py b/logfire/_internal/config.py index fbe70dd29..0fb86c8c6 100644 --- a/logfire/_internal/config.py +++ b/logfire/_internal/config.py @@ -16,7 +16,6 @@ from typing import TYPE_CHECKING, Any, Callable, Literal, Sequence, TypedDict, cast from urllib.parse import urljoin from uuid import uuid4 -from weakref import WeakSet import requests from opentelemetry import trace @@ -87,7 +86,7 @@ from .metrics import ProxyMeterProvider from .scrubbing import NOOP_SCRUBBER, BaseScrubber, Scrubber, ScrubbingOptions from .stack_info import warn_at_user_stacklevel -from .tracer import PendingSpanProcessor, ProxyTracerProvider +from .tracer import OPEN_SPANS, PendingSpanProcessor, ProxyTracerProvider from .utils import ( SeededRandomIdGenerator, UnexpectedResponse, @@ -98,10 +97,8 @@ ) if TYPE_CHECKING: - from .main import FastLogfireSpan, Logfire, LogfireSpan + from .main import Logfire -# NOTE: this WeakSet is the reason that FastLogfireSpan.__slots__ has a __weakref__ slot. -OPEN_SPANS: WeakSet[LogfireSpan | FastLogfireSpan] = WeakSet() CREDENTIALS_FILENAME = 'logfire_credentials.json' """Default base URL for the Logfire API.""" @@ -948,7 +945,7 @@ def _exit_open_spans(): # type: ignore[reportUnusedFunction] # pragma: no cove # Registering this callback here after the OTEL one means that this runs first. # Otherwise OTEL would log an error "Already shutdown, dropping span." for span in list(OPEN_SPANS): - span.__exit__(None, None, None) + span.end() self._initialized = True diff --git a/logfire/_internal/main.py b/logfire/_internal/main.py index c0da4ad93..f11b06bd5 100644 --- a/logfire/_internal/main.py +++ b/logfire/_internal/main.py @@ -4,7 +4,6 @@ import inspect import json import sys -import traceback import warnings from functools import cached_property from time import time @@ -26,15 +25,14 @@ import opentelemetry.trace as trace_api from opentelemetry.metrics import CallbackT, Counter, Histogram, UpDownCounter from opentelemetry.sdk.trace import ReadableSpan, Span -from opentelemetry.semconv.trace import SpanAttributes -from opentelemetry.trace import SpanContext, StatusCode, Tracer +from opentelemetry.trace import SpanContext, Tracer from opentelemetry.util import types as otel_types from typing_extensions import LiteralString, ParamSpec from ..version import VERSION from . import async_ from .auto_trace import AutoTraceModule, install_auto_tracing -from .config import GLOBAL_CONFIG, OPEN_SPANS, LogfireConfig +from .config import GLOBAL_CONFIG, LogfireConfig from .config_params import PydanticPluginRecordValues from .constants import ( ATTRIBUTES_JSON_SCHEMA_KEY, @@ -44,7 +42,6 @@ ATTRIBUTES_SAMPLE_RATE_KEY, ATTRIBUTES_SPAN_TYPE_KEY, ATTRIBUTES_TAGS_KEY, - ATTRIBUTES_VALIDATION_ERROR_KEY, DISABLE_CONSOLE_KEY, LEVEL_NUMBERS, NULL_ARGS_KEY, @@ -63,7 +60,7 @@ ) from .metrics import ProxyMeterProvider from .stack_info import get_user_stack_info -from .tracer import ProxyTracerProvider +from .tracer import ProxyTracerProvider, record_exception, set_exception_status from .utils import get_version, handle_internal_errors, log_internal_error, uniquify_sequence if TYPE_CHECKING: @@ -107,12 +104,6 @@ ExcInfo = Union[SysExcInfo, BaseException, bool, None] -try: - from pydantic import ValidationError -except ImportError: # pragma: no cover - ValidationError = None - - class Logfire: """The main logfire class.""" @@ -194,7 +185,7 @@ def _span( merged_attributes[ATTRIBUTES_MESSAGE_TEMPLATE_KEY] = msg_template merged_attributes[ATTRIBUTES_MESSAGE_KEY] = log_message - otlp_attributes = user_attributes(merged_attributes) + otlp_attributes = prepare_otlp_attributes(merged_attributes) if json_schema_properties := attributes_json_schema_properties(attributes): otlp_attributes[ATTRIBUTES_JSON_SCHEMA_KEY] = attributes_json_schema(json_schema_properties) @@ -250,7 +241,7 @@ def _instrument_span_with_args( attributes[ATTRIBUTES_MESSAGE_KEY] = logfire_format(msg_template, function_args, self._config.scrubber) if json_schema_properties := attributes_json_schema_properties(function_args): # pragma: no branch attributes[ATTRIBUTES_JSON_SCHEMA_KEY] = attributes_json_schema(json_schema_properties) - attributes.update(user_attributes(function_args)) + attributes.update(prepare_otlp_attributes(function_args)) return self._fast_span(name, attributes) except Exception: # pragma: no cover log_internal_error() @@ -687,7 +678,7 @@ def log( msg = merged_attributes[ATTRIBUTES_MESSAGE_KEY] = str(msg) msg_template = str(msg_template) - otlp_attributes = user_attributes(merged_attributes) + otlp_attributes = prepare_otlp_attributes(merged_attributes) otlp_attributes = { ATTRIBUTES_SPAN_TYPE_KEY: 'log', **log_level_attributes(level), @@ -726,12 +717,12 @@ def log( if isinstance(exc_info, tuple): exc_info = exc_info[1] if isinstance(exc_info, BaseException): - _record_exception(span, exc_info) + record_exception(span, exc_info) if otlp_attributes[ATTRIBUTES_LOG_LEVEL_NUM_KEY] >= LEVEL_NUMBERS['error']: # type: ignore # Set the status description to the exception message. # OTEL only lets us set the description when the status code is ERROR, # which we only want to do when the log level is error. - _set_exception_status(span, exc_info) + set_exception_status(span, exc_info) elif exc_info is not None: # pragma: no cover raise TypeError(f'Invalid type for exc_info: {exc_info.__class__.__name__}') @@ -1931,20 +1922,17 @@ def shutdown(self, timeout_millis: int = 30_000, flush: bool = True) -> bool: # class FastLogfireSpan: """A simple version of `LogfireSpan` optimized for auto-tracing.""" - # __weakref__ is needed for the OPEN_SPANS WeakSet. - __slots__ = ('_span', '_token', '__weakref__') + __slots__ = ('_span', '_token') def __init__(self, span: trace_api.Span) -> None: self._span = span self._token = context_api.attach(trace_api.set_span_in_context(self._span)) - OPEN_SPANS.add(self) def __enter__(self) -> FastLogfireSpan: return self @handle_internal_errors() def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseException | None, traceback: Any) -> None: - OPEN_SPANS.remove(self) context_api.detach(self._token) _exit_span(self._span, exc_value) self._span.end() @@ -1986,8 +1974,6 @@ def __enter__(self) -> LogfireSpan: if self._token is None: # pragma: no branch self._token = context_api.attach(trace_api.set_span_in_context(self._span)) - OPEN_SPANS.add(self) - return self @handle_internal_errors() @@ -1995,8 +1981,6 @@ def __exit__(self, exc_type: type[BaseException] | None, exc_value: BaseExceptio if self._token is None: # pragma: no cover return - OPEN_SPANS.remove(self) - context_api.detach(self._token) self._token = None @@ -2029,7 +2013,7 @@ def message(self) -> str: def message(self, message: str): self._set_attribute(ATTRIBUTES_MESSAGE_KEY, message) - def end(self) -> None: + def end(self, end_time: int | None = None) -> None: """Sets the current time as the span's end time. The span's end time is the wall time at which the operation finished. @@ -2047,7 +2031,7 @@ def end(self) -> None: ATTRIBUTES_JSON_SCHEMA_KEY, attributes_json_schema(self._json_schema_properties) ) - self._span.end() + self._span.end(end_time) @handle_internal_errors() def set_attribute(self, key: str, value: Any) -> None: @@ -2093,7 +2077,7 @@ def record_exception( if not self._span.is_recording(): return - _record_exception( + record_exception( self._span, exception, attributes=attributes, @@ -2187,61 +2171,13 @@ def _exit_span(span: trace_api.Span, exception: BaseException | None) -> None: # record exception if present # isinstance is to ignore BaseException if isinstance(exception, Exception): - _record_exception(span, exception, escaped=True) - - -def _set_exception_status(span: trace_api.Span, exception: BaseException): - span.set_status( - trace_api.Status( - status_code=StatusCode.ERROR, - description=f'{exception.__class__.__name__}: {exception}', - ) - ) - - -@handle_internal_errors() -def _record_exception( - span: trace_api.Span, - exception: BaseException, - *, - attributes: otel_types.Attributes = None, - timestamp: int | None = None, - escaped: bool = False, -) -> None: - """Similar to the OTEL SDK Span.record_exception method, with our own additions.""" - # From https://opentelemetry.io/docs/specs/semconv/attributes-registry/exception/ - # `escaped=True` means that the exception is escaping the scope of the span. - # This means we know that the exception hasn't been handled, - # so we can set the OTEL status and the log level to error. - if escaped: - _set_exception_status(span, exception) - span.set_attributes(log_level_attributes('error')) - - attributes = {**(attributes or {})} - if ValidationError is not None and isinstance(exception, ValidationError): - # insert a more detailed breakdown of pydantic errors - try: - err_json = exception.json(include_url=False) - except TypeError: # pragma: no cover - # pydantic v1 - err_json = exception.json() - span.set_attribute(ATTRIBUTES_VALIDATION_ERROR_KEY, err_json) - attributes[ATTRIBUTES_VALIDATION_ERROR_KEY] = err_json - - if exception is not sys.exc_info()[1]: - # OTEL's record_exception uses `traceback.format_exc()` which is for the current exception, - # ignoring the passed exception. - # So we override the stacktrace attribute with the correct one. - stacktrace = ''.join(traceback.format_exception(type(exception), exception, exception.__traceback__)) - attributes[SpanAttributes.EXCEPTION_STACKTRACE] = stacktrace - - span.record_exception(exception, attributes=attributes, timestamp=timestamp, escaped=escaped) + record_exception(span, exception, escaped=True) AttributesValueType = TypeVar('AttributesValueType', bound=Union[Any, otel_types.AttributeValue]) -def user_attributes(attributes: dict[str, Any]) -> dict[str, otel_types.AttributeValue]: +def prepare_otlp_attributes(attributes: dict[str, Any]) -> dict[str, otel_types.AttributeValue]: """Prepare attributes for sending to OpenTelemetry. This will convert any non-OpenTelemetry compatible types to JSON. @@ -2288,7 +2224,7 @@ def set_user_attributes_on_raw_span(span: Span, attributes: dict[str, Any]) -> N if not span.is_recording(): return - otlp_attributes = user_attributes(attributes) + otlp_attributes = prepare_otlp_attributes(attributes) if json_schema_properties := attributes_json_schema_properties(attributes): # pragma: no branch existing_properties = JsonSchemaProperties({}) existing_json_schema_str = (span.attributes or {}).get(ATTRIBUTES_JSON_SCHEMA_KEY) diff --git a/logfire/_internal/tracer.py b/logfire/_internal/tracer.py index d4b86979f..5d7ad8410 100644 --- a/logfire/_internal/tracer.py +++ b/logfire/_internal/tracer.py @@ -1,9 +1,11 @@ from __future__ import annotations +import sys +import traceback from dataclasses import dataclass, field from threading import Lock from typing import TYPE_CHECKING, Any, Callable, Mapping, Sequence, cast -from weakref import WeakKeyDictionary +from weakref import WeakKeyDictionary, WeakSet import opentelemetry.trace as trace_api from opentelemetry import context as context_api @@ -17,6 +19,7 @@ ) from opentelemetry.sdk.trace.id_generator import IdGenerator from opentelemetry.semconv.resource import ResourceAttributes +from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.trace import Link, NonRecordingSpan, Span, SpanContext, SpanKind, Tracer, TracerProvider from opentelemetry.trace.propagation import get_current_span from opentelemetry.trace.status import Status, StatusCode @@ -27,12 +30,23 @@ ATTRIBUTES_PENDING_SPAN_REAL_PARENT_KEY, ATTRIBUTES_SAMPLE_RATE_KEY, ATTRIBUTES_SPAN_TYPE_KEY, + ATTRIBUTES_VALIDATION_ERROR_KEY, PENDING_SPAN_NAME_SUFFIX, + log_level_attributes, ) +from .utils import handle_internal_errors if TYPE_CHECKING: from .config import LogfireConfig +try: + from pydantic import ValidationError +except ImportError: # pragma: no cover + ValidationError = None + + +OPEN_SPANS: WeakSet[_LogfireWrappedSpan] = WeakSet() + @dataclass class ProxyTracerProvider(TracerProvider): @@ -100,14 +114,18 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: return True # pragma: no cover -@dataclass -class _MaybeDeterministicTimestampSpan(trace_api.Span, ReadableSpan): +@dataclass(eq=False) +class _LogfireWrappedSpan(trace_api.Span, ReadableSpan): """Span that overrides end() to use a timestamp generator if one was provided.""" span: Span ns_timestamp_generator: Callable[[], int] + def __post_init__(self): + OPEN_SPANS.add(self) + def end(self, end_time: int | None = None) -> None: + OPEN_SPANS.discard(self) self.span.end(end_time or self.ns_timestamp_generator()) def get_span_context(self) -> SpanContext: @@ -151,7 +169,7 @@ def record_exception( escaped: bool = False, ) -> None: timestamp = timestamp or self.ns_timestamp_generator() - return self.span.record_exception(exception, attributes, timestamp, escaped) + record_exception(self.span, exception, attributes=attributes, timestamp=timestamp, escaped=escaped) if not TYPE_CHECKING: # pragma: no branch # for ReadableSpan @@ -211,7 +229,7 @@ def start_span( ), ) ) - return _MaybeDeterministicTimestampSpan( + return _LogfireWrappedSpan( span, ns_timestamp_generator=self.provider.config.advanced.ns_timestamp_generator, ) @@ -314,3 +332,51 @@ def get_sample_rate_from_attributes(attributes: otel_types.Attributes) -> float if not attributes: # pragma: no cover return None return cast('float | None', attributes.get(ATTRIBUTES_SAMPLE_RATE_KEY)) + + +@handle_internal_errors() +def record_exception( + span: trace_api.Span, + exception: BaseException, + *, + attributes: otel_types.Attributes = None, + timestamp: int | None = None, + escaped: bool = False, +) -> None: + """Similar to the OTEL SDK Span.record_exception method, with our own additions.""" + # From https://opentelemetry.io/docs/specs/semconv/attributes-registry/exception/ + # `escaped=True` means that the exception is escaping the scope of the span. + # This means we know that the exception hasn't been handled, + # so we can set the OTEL status and the log level to error. + if escaped: + set_exception_status(span, exception) + span.set_attributes(log_level_attributes('error')) + + attributes = {**(attributes or {})} + if ValidationError is not None and isinstance(exception, ValidationError): + # insert a more detailed breakdown of pydantic errors + try: + err_json = exception.json(include_url=False) + except TypeError: # pragma: no cover + # pydantic v1 + err_json = exception.json() + span.set_attribute(ATTRIBUTES_VALIDATION_ERROR_KEY, err_json) + attributes[ATTRIBUTES_VALIDATION_ERROR_KEY] = err_json + + if exception is not sys.exc_info()[1]: + # OTEL's record_exception uses `traceback.format_exc()` which is for the current exception, + # ignoring the passed exception. + # So we override the stacktrace attribute with the correct one. + stacktrace = ''.join(traceback.format_exception(type(exception), exception, exception.__traceback__)) + attributes[SpanAttributes.EXCEPTION_STACKTRACE] = stacktrace + + span.record_exception(exception, attributes=attributes, timestamp=timestamp, escaped=escaped) + + +def set_exception_status(span: trace_api.Span, exception: BaseException): + span.set_status( + trace_api.Status( + status_code=StatusCode.ERROR, + description=f'{exception.__class__.__name__}: {exception}', + ) + )