From a0304a039e52cdb6494d5767b52261bb50463954 Mon Sep 17 00:00:00 2001 From: Carey Metcalfe Date: Mon, 25 Nov 2024 12:07:59 -0500 Subject: [PATCH] Use `LogRecord.getMessage` to get OTLP body (#4216) --- CHANGELOG.md | 3 + .../sdk/_logs/_internal/__init__.py | 56 ++++++------------- opentelemetry-sdk/tests/logs/test_export.py | 46 +++++++++++++++ 3 files changed, 65 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf0ae6a891..b8fb2bd3f4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#4224](https://github.com/open-telemetry/opentelemetry-python/pull/4224)) - Drop `OTEL_PYTHON_EXPERIMENTAL_DISABLE_PROMETHEUS_UNIT_NORMALIZATION` environment variable ([#4217](https://github.com/open-telemetry/opentelemetry-python/pull/4217)) +- Improve compatibility with other logging libraries that override + `LogRecord.getMessage()` in order to customize message formatting + ([#4216](https://github.com/open-telemetry/opentelemetry-python/pull/4216)) ## Version 1.27.0/0.48b0 (2024-08-28) diff --git a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py index 2d52b1bc74..110bb89c99 100644 --- a/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py +++ b/opentelemetry-sdk/src/opentelemetry/sdk/_logs/_internal/__init__.py @@ -438,6 +438,7 @@ def force_flush(self, timeout_millis: int = 30000) -> bool: "exc_text", "filename", "funcName", + "getMessage", "message", "levelname", "levelno", @@ -503,51 +504,26 @@ def _translate(self, record: logging.LogRecord) -> LogRecord: observered_timestamp = time_ns() span_context = get_current_span().get_span_context() attributes = self._get_attributes(record) - # This comment is taken from GanyedeNil's PR #3343, I have redacted it - # slightly for clarity: - # According to the definition of the Body field type in the - # OTel 1.22.0 Logs Data Model article, the Body field should be of - # type 'any' and should not use the str method to directly translate - # the msg. This is because str only converts non-text types into a - # human-readable form, rather than a standard format, which leads to - # the need for additional operations when collected through a log - # collector. - # Considering that he Body field should be of type 'any' and should not - # use the str method but record.msg is also a string type, then the - # difference is just the self.args formatting? - # The primary consideration depends on the ultimate purpose of the log. - # Converting the default log directly into a string is acceptable as it - # will be required to be presented in a more readable format. However, - # this approach might not be as "standard" when hoping to aggregate - # logs and perform subsequent data analysis. In the context of log - # extraction, it would be more appropriate for the msg to be - # converted into JSON format or remain unchanged, as it will eventually - # be transformed into JSON. If the final output JSON data contains a - # structure that appears similar to JSON but is not, it may confuse - # users. This is particularly true for operation and maintenance - # personnel who need to deal with log data in various languages. - # Where is the JSON converting occur? and what about when the msg - # represents something else but JSON, the expected behavior change? - # For the ConsoleLogExporter, it performs the to_json operation in - # opentelemetry.sdk._logs._internal.export.ConsoleLogExporter.__init__, - # so it can handle any type of input without problems. As for the - # OTLPLogExporter, it also handles any type of input encoding in - # _encode_log located in - # opentelemetry.exporter.otlp.proto.common._internal._log_encoder. - # Therefore, no extra operation is needed to support this change. - # The only thing to consider is the users who have already been using - # this SDK. If they upgrade the SDK after this change, they will need - # to readjust their logging collection rules to adapt to the latest - # output format. Therefore, this change is considered a breaking - # change and needs to be upgraded at an appropriate time. severity_number = std_to_otel(record.levelno) if self.formatter: body = self.format(record) else: - if isinstance(record.msg, str) and record.args: - body = record.msg % record.args - else: + # `record.getMessage()` uses `record.msg` as a template to format + # `record.args` into. There is a special case in `record.getMessage()` + # where it will only attempt formatting if args are provided, + # otherwise, it just stringifies `record.msg`. + # + # Since the OTLP body field has a type of 'any' and the logging module + # is sometimes used in such a way that objects incorrectly end up + # set as record.msg, in those cases we would like to bypass + # `record.getMessage()` completely and set the body to the object + # itself instead of its string representation. + # For more background, see: https://github.com/open-telemetry/opentelemetry-python/pull/4216 + if not record.args and not isinstance(record.msg, str): + # no args are provided so it's *mostly* safe to use the message template as the body body = record.msg + else: + body = record.getMessage() # related to https://github.com/open-telemetry/opentelemetry-python/issues/3548 # Severity Text = WARN as defined in https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/logs/data-model.md#displaying-severity. diff --git a/opentelemetry-sdk/tests/logs/test_export.py b/opentelemetry-sdk/tests/logs/test_export.py index ce31d3991f..2e00bad653 100644 --- a/opentelemetry-sdk/tests/logs/test_export.py +++ b/opentelemetry-sdk/tests/logs/test_export.py @@ -230,6 +230,52 @@ def test_simple_log_record_processor_different_msg_types(self): item.instrumentation_scope.name, "different_msg_types" ) + def test_simple_log_record_processor_custom_single_obj(self): + """ + Tests that special-case handling for logging a single non-string object + is correctly applied. + """ + exporter = InMemoryLogExporter() + log_record_processor = BatchLogRecordProcessor(exporter) + + provider = LoggerProvider() + provider.add_log_record_processor(log_record_processor) + + logger = logging.getLogger("single_obj") + logger.addHandler(LoggingHandler(logger_provider=provider)) + + # NOTE: the behaviour of `record.getMessage` is detailed in the + # `logging.Logger.debug` documentation: + # > The msg is the message format string, and the args are the arguments + # > which are merged into msg using the string formatting operator. [...] + # > No % formatting operation is performed on msg when no args are supplied. + + # This test uses the presence of '%s' in the first arg to determine if + # formatting was applied + + # string msg with no args - getMessage bypasses formatting and sets the string directly + logger.warning("a string with a percent-s: %s") + # string msg with args - getMessage formats args into the msg + logger.warning("a string with a percent-s: %s", "and arg") + # non-string msg with args - getMessage stringifies msg and formats args into it + logger.warning(["a non-string with a percent-s", "%s"], "and arg") + # non-string msg with no args: + # - normally getMessage would stringify the object and bypass formatting + # - SPECIAL CASE: bypass stringification as well to keep the raw object + logger.warning(["a non-string with a percent-s", "%s"]) + log_record_processor.shutdown() + + finished_logs = exporter.get_finished_logs() + expected = [ + ("a string with a percent-s: %s"), + ("a string with a percent-s: and arg"), + ("['a non-string with a percent-s', 'and arg']"), + (["a non-string with a percent-s", "%s"]), + ] + for emitted, expected in zip(finished_logs, expected): + self.assertEqual(emitted.log_record.body, expected) + self.assertEqual(emitted.instrumentation_scope.name, "single_obj") + def test_simple_log_record_processor_different_msg_types_with_formatter( self, ):