diff --git a/CHANGELOG.md b/CHANGELOG.md index 9cc7dd6..5e1ea14 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,14 +4,27 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [3.1.0.rc1](https://github.com/nhairs/python-json-logger/compare/v3.0.1...v3.1.0.rc1) - 2023-05-03 +## [3.1.0.rc2](https://github.com/nhairs/python-json-logger/compare/v3.0.1...v3.1.0.rc2) - 2023-05-03 This splits common funcitonality out to allow supporting other JSON encoders. Although this is a large refactor, backwards compatibility has been maintained. ### Added - `.core` - more details below. -- Orjson encoder support via `.orjson.OrjsonFormatter`. -- MsgSpec encoder support via `.msgspec.MsgspecFormatter`. +- `.defaults` module that provides many functions for handling unsupported types. +- Orjson encoder support via `.orjson.OrjsonFormatter` with the following additions: + - bytes are URL safe base64 encoded. + - Exceptions are "pretty printed" using the exception name and message e.g. `"ValueError: bad value passed"` + - Enum values use their value, Enum classes now return all values as a list. + - Tracebacks are supported + - Classes (aka types) are support + - Will fallback on `__str__` if available, else `__repr__` if available, else will use `__could_not_encode__` +- MsgSpec encoder support via `.msgspec.MsgspecFormatter` with the following additions: + - Exceptions are "pretty printed" using the exception name and message e.g. `"ValueError: bad value passed"` + - Enum classes now return all values as a list. + - Tracebacks are supported + - Classes (aka types) are support + - Will fallback on `__str__` if available, else `__repr__` if available, else will use `__could_not_encode__` + - Note: msgspec only supprts enum values of type `int` or `str` [jcrist/msgspec#680](https://github.com/jcrist/msgspec/issues/680) ### Changed - `.jsonlogger` has been moved to `.json` with core functionality moved to `.core`. @@ -21,6 +34,12 @@ This splits common funcitonality out to allow supporting other JSON encoders. Al - `style` can now support non-standard arguments by setting `validate` to `False` - `validate` allows non-standard `style` arguments or prevents calling `validate` on standard `style` arguments. - `default` is ignored. +- `.json.JsonEncoder` default encodings changed: + - bytes are URL safe base64 encoded. + - Exception formatting detected using `BaseException` instead of `Exception`. Now "pretty prints" the exception using the exception name and message e.g. `"ValueError: bad value passed"` + - Dataclasses are now supported + - Enum values now use their value, Enum classes now return all values as a list. + - Will fallback on `__str__` if available, else `__repr__` if available, else will use `__could_not_encode__` ### Deprecated - `.jsonlogger` is now `.json` diff --git a/pylintrc b/pylintrc index 79541d9..3db6c3e 100644 --- a/pylintrc +++ b/pylintrc @@ -75,8 +75,9 @@ disable=raw-checker-failed, # cases. Disable rules that can cause conflicts line-too-long, # Module docstrings are not required - missing-module-docstring + missing-module-docstring, ## Project Disables + duplicate-code # Enable the message, report, category or checker with the given id(s). You can # either give multiple identifier separated by comma (,) or put this option diff --git a/pyproject.toml b/pyproject.toml index 13492aa..8264289 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "python-json-logger" -version = "3.1.0.rc1" +version = "3.1.0.rc2" description = "JSON Log Formatter for the Python Logging Package" authors = [ {name = "Zakaria Zajac", email = "zak@madzak.com"}, @@ -55,6 +55,8 @@ dev = [ ## Test "pytest", "freezegun", + "backports.zoneinfo;python_version<'3.9'", + "tzdata", ## Build "build", ] diff --git a/src/pythonjsonlogger/defaults.py b/src/pythonjsonlogger/defaults.py new file mode 100644 index 0000000..542f9a8 --- /dev/null +++ b/src/pythonjsonlogger/defaults.py @@ -0,0 +1,146 @@ +# pylint: disable=missing-function-docstring + +### IMPORTS +### ============================================================================ +## Future +from __future__ import annotations + +## Standard Library +import base64 +import dataclasses +import datetime +import enum +import sys +from types import TracebackType +from typing import Any +import traceback +import uuid + +if sys.version_info >= (3, 10): + from typing import TypeGuard +else: + from typing_extensions import TypeGuard + +## Installed + +## Application + + +### FUNCTIONS +### ============================================================================ +def unknown_default(obj: Any) -> str: + try: + return str(obj) + except Exception: # pylint: disable=broad-exception-caught + pass + try: + return repr(obj) + except Exception: # pylint: disable=broad-exception-caught + pass + return "__could_not_encode__" + + +## Types +## ----------------------------------------------------------------------------- +def use_type_default(obj: Any) -> TypeGuard[type]: + return isinstance(obj, type) + + +def type_default(obj: type) -> str: + return obj.__name__ + + +## Dataclasses +## ----------------------------------------------------------------------------- +def use_dataclass_default(obj: Any) -> bool: + return dataclasses.is_dataclass(obj) and not isinstance(obj, type) + + +def dataclass_default(obj) -> dict[str, Any]: + return dataclasses.asdict(obj) + + +## Dates and Times +## ----------------------------------------------------------------------------- +def use_time_default(obj: Any) -> TypeGuard[datetime.time]: + return isinstance(obj, datetime.time) + + +def time_default(obj: datetime.time) -> str: + return obj.isoformat() + + +def use_date_default(obj: Any) -> TypeGuard[datetime.date]: + return isinstance(obj, datetime.date) + + +def date_default(obj: datetime.date) -> str: + return obj.isoformat() + + +def use_datetime_default(obj: Any) -> TypeGuard[datetime.datetime]: + return isinstance(obj, datetime.datetime) + + +def datetime_default(obj: datetime.datetime) -> str: + return obj.isoformat() + + +def use_datetime_any(obj: Any) -> TypeGuard[datetime.time | datetime.date | datetime.datetime]: + return isinstance(obj, (datetime.time, datetime.date, datetime.datetime)) + + +def datetime_any(obj: datetime.time | datetime.date | datetime.date) -> str: + return obj.isoformat() + + +## Exception and Tracebacks +## ----------------------------------------------------------------------------- +def use_exception_default(obj: Any) -> TypeGuard[BaseException]: + return isinstance(obj, BaseException) + + +def exception_default(obj: BaseException) -> str: + return f"{obj.__class__.__name__}: {obj}" + + +def use_traceback_default(obj: Any) -> TypeGuard[TracebackType]: + return isinstance(obj, TracebackType) + + +def traceback_default(obj: TracebackType) -> str: + return "".join(traceback.format_tb(obj)).strip() + + +## Enums +## ----------------------------------------------------------------------------- +def use_enum_default(obj: Any) -> TypeGuard[enum.Enum | enum.EnumMeta]: + return isinstance(obj, (enum.Enum, enum.EnumMeta)) + + +def enum_default(obj: enum.Enum | enum.EnumMeta) -> Any | list[Any]: + if isinstance(obj, enum.Enum): + return obj.value + return [e.value for e in obj] # type: ignore[var-annotated] + + +## UUIDs +## ----------------------------------------------------------------------------- +def use_uuid_default(obj: Any) -> TypeGuard[uuid.UUID]: + return isinstance(obj, uuid.UUID) + + +def uuid_default(obj: uuid.UUID) -> str: + return str(obj) + + +## Bytes +## ----------------------------------------------------------------------------- +def use_bytes_default(obj: Any) -> TypeGuard[bytes | bytearray]: + return isinstance(obj, (bytes, bytearray)) + + +def bytes_default(obj: bytes | bytearray, url_safe: bool = True) -> str: + if url_safe: + return base64.urlsafe_b64encode(obj).decode("utf8") + return base64.b64encode(obj).decode("utf8") diff --git a/src/pythonjsonlogger/json.py b/src/pythonjsonlogger/json.py index 9d14a1d..9f6df21 100644 --- a/src/pythonjsonlogger/json.py +++ b/src/pythonjsonlogger/json.py @@ -10,15 +10,14 @@ from __future__ import annotations ## Standard Library -from datetime import date, datetime, time -from inspect import istraceback +import datetime import json -import traceback from typing import Any, Callable, Optional, Union import warnings ## Application from . import core +from . import defaults as d ### CLASSES @@ -31,33 +30,39 @@ class JsonEncoder(json.JSONEncoder): """ def default(self, o: Any) -> Any: - if isinstance(o, (date, datetime, time)): + if d.use_datetime_any(o): return self.format_datetime_obj(o) - if istraceback(o): - return "".join(traceback.format_tb(o)).strip() + if d.use_exception_default(o): + return d.exception_default(o) - # pylint: disable=unidiomatic-typecheck - if type(o) == Exception or isinstance(o, Exception) or type(o) == type: - return str(o) + if d.use_traceback_default(o): + return d.traceback_default(o) + + if d.use_enum_default(o): + return d.enum_default(o) + + if d.use_bytes_default(o): + return d.bytes_default(o) + + if d.use_dataclass_default(o): + return d.dataclass_default(o) + + if d.use_type_default(o): + return d.type_default(o) try: return super().default(o) - except TypeError: - try: - return str(o) - - except Exception: # pylint: disable=broad-exception-caught - return None + return d.unknown_default(o) - def format_datetime_obj(self, o): + def format_datetime_obj(self, o: datetime.time | datetime.date | datetime.datetime) -> str: """Format datetime objects found in self.default This allows subclasses to change the datetime format without understanding the internals of the default method. """ - return o.isoformat() + return d.datetime_any(o) class JsonFormatter(core.BaseJsonFormatter): diff --git a/src/pythonjsonlogger/msgspec.py b/src/pythonjsonlogger/msgspec.py index e711224..de41200 100644 --- a/src/pythonjsonlogger/msgspec.py +++ b/src/pythonjsonlogger/msgspec.py @@ -4,12 +4,29 @@ from __future__ import annotations ## Standard Library +from typing import Any ## Installed import msgspec.json ## Application from . import core +from . import defaults as d + + +### FUNCTIONS +### ============================================================================ +def msgspec_default(obj: Any) -> Any: + """msgspec default encoder function for non-standard types""" + if d.use_exception_default(obj): + return d.exception_default(obj) + if d.use_traceback_default(obj): + return d.traceback_default(obj) + if d.use_enum_default(obj): + return d.enum_default(obj) + if d.use_type_default(obj): + return d.type_default(obj) + return d.unknown_default(obj) ### CLASSES @@ -25,7 +42,7 @@ class MsgspecFormatter(core.BaseJsonFormatter): def __init__( self, *args, - json_default: core.OptionalCallableOrStr = None, + json_default: core.OptionalCallableOrStr = msgspec_default, **kwargs, ) -> None: """ diff --git a/src/pythonjsonlogger/orjson.py b/src/pythonjsonlogger/orjson.py index 4c5dbab..ebae618 100644 --- a/src/pythonjsonlogger/orjson.py +++ b/src/pythonjsonlogger/orjson.py @@ -4,12 +4,31 @@ from __future__ import annotations ## Standard Library +from typing import Any ## Installed import orjson ## Application from . import core +from . import defaults as d + + +### FUNCTIONS +### ============================================================================ +def orjson_default(obj: Any) -> Any: + """orjson default encoder function for non-standard types""" + if d.use_exception_default(obj): + return d.exception_default(obj) + if d.use_traceback_default(obj): + return d.traceback_default(obj) + if d.use_bytes_default(obj): + return d.bytes_default(obj) + if d.use_enum_default(obj): + return d.enum_default(obj) + if d.use_type_default(obj): + return d.type_default(obj) + return d.unknown_default(obj) ### CLASSES @@ -25,7 +44,7 @@ class OrjsonFormatter(core.BaseJsonFormatter): def __init__( self, *args, - json_default: core.OptionalCallableOrStr = None, + json_default: core.OptionalCallableOrStr = orjson_default, json_indent: bool = False, **kwargs, ) -> None: diff --git a/tests/test_formatters.py b/tests/test_formatters.py index 6719f36..1a6702c 100644 --- a/tests/test_formatters.py +++ b/tests/test_formatters.py @@ -16,12 +16,18 @@ from typing import Any, Generator import uuid +if sys.version_info >= (3, 9): + import zoneinfo +else: + from backports import zoneinfo + ## Installed -from freezegun import freeze_time +import freezegun import pytest ## Application import pythonjsonlogger +import pythonjsonlogger.defaults from pythonjsonlogger.core import RESERVED_ATTRS, BaseJsonFormatter, merge_record_extra from pythonjsonlogger.json import JsonFormatter @@ -75,7 +81,7 @@ def env() -> Generator[LoggingEnvironment, None, None]: def get_traceback_from_exception_followed_by_log_call(env_: LoggingEnvironment) -> str: try: raise Exception("test") - except Exception: + except Exception as e: env_.logger.exception("hello") str_traceback = traceback.format_exc() # Formatter removes trailing new line @@ -90,6 +96,14 @@ def __init__(self, thing: int): return +class BrokenClass: + def __str__(self) -> str: + raise ValueError("hahah sucker") + + def __repr__(self) -> str: + return self.__str__() + + @dataclass class SomeDataclass: things: str @@ -109,7 +123,10 @@ class MultiEnum(enum.Enum): BOOL = False STR = "somestring" INT = 99 - BYTES = b"somebytes" + BYTES = b"some-bytes" + + +NO_TEST = object() # Sentinal ### TESTS @@ -361,18 +378,23 @@ def test_rename_reserved_attrs(env: LoggingEnvironment, class_: type[BaseJsonFor return -@freeze_time(datetime.datetime(2017, 7, 14, 2, 40)) +@freezegun.freeze_time(datetime.datetime(2017, 7, 14, 2, 40)) @pytest.mark.parametrize("class_", ALL_FORMATTERS) def test_default_encoder_with_timestamp(env: LoggingEnvironment, class_: type[BaseJsonFormatter]): - if pythonjsonlogger.ORJSON_AVAILABLE and class_ is OrjsonFormatter: + if (pythonjsonlogger.ORJSON_AVAILABLE and class_ is OrjsonFormatter) or ( + pythonjsonlogger.MSGSPEC_AVAILABLE and class_ is MsgspecFormatter + ): + # FakeDatetime not supported # https://github.com/ijl/orjson/issues/481 - pytest.xfail() - - if pythonjsonlogger.MSGSPEC_AVAILABLE and class_ is MsgspecFormatter: # https://github.com/jcrist/msgspec/issues/678 - pytest.xfail() + def json_default(obj: Any) -> Any: + if isinstance(obj, freezegun.api.FakeDate): + return obj.isoformat() + raise ValueError(f"Unexpected object: {obj!r}") - env.set_formatter(class_(timestamp=True)) + env.set_formatter(class_(timestamp=True, json_default=json_default)) # type: ignore[call-arg] + else: + env.set_formatter(class_(timestamp=True)) env.logger.info("Hello") log_json = env.load_json() @@ -383,72 +405,63 @@ def test_default_encoder_with_timestamp(env: LoggingEnvironment, class_: type[Ba @pytest.mark.parametrize("class_", ALL_FORMATTERS) @pytest.mark.parametrize( - ["obj", "type_"], + ["obj", "type_", "expected"], [ - ("somestring", str), - (1234, int), - (1234.5, float), - (False, bool), - (None, type(None)), - (b"somebytes", str), - (datetime.time(16, 45, 30, 100), str), - (datetime.date.today(), str), - (datetime.datetime.utcnow(), str), - (uuid.uuid4(), str), - (Exception, str), - (Exception("Foo occurred"), str), - (BaseException, str), - (BaseException("BaseFoo occurred"), str), - (STATIC_TRACEBACK, str), - (SomeDataclass(things="le_things", stuff=99, junk=False), dict), - (SomeDataclass, str), - (SomeClass, str), - (SomeClass(1234), str), - (MultiEnum.NONE, type(None)), - (MultiEnum.BOOL, bool), - (MultiEnum.STR, str), - (MultiEnum.INT, int), - (MultiEnum.BYTES, str), - (MultiEnum, str), + ("somestring", str, "somestring"), + ("some unicode Привет", str, "some unicode Привет"), + (1234, int, 1234), + (1234.5, float, 1234.5), + (False, bool, False), + (None, type(None), None), + (b"some-bytes", str, "c29tZS1ieXRlcw=="), + (datetime.time(16, 45, 30, 100), str, "16:45:30.000100"), + (datetime.date(2024, 5, 5), str, "2024-05-05"), + (datetime.datetime(2024, 5, 5, 16, 45, 30, 100), str, "2024-05-05T16:45:30.000100"), + ( + datetime.datetime(2024, 5, 5, 16, 45, 30, 100, zoneinfo.ZoneInfo("Australia/Sydney")), + str, + "2024-05-05T16:45:30.000100+10:00", + ), + ( + uuid.UUID("urn:uuid:12345678-1234-5678-1234-567812345678"), + str, + "12345678-1234-5678-1234-567812345678", + ), + (Exception, str, "Exception"), + (Exception("Foo occurred"), str, "Exception: Foo occurred"), + (BaseException, str, "BaseException"), + (BaseException("BaseFoo occurred"), str, "BaseException: BaseFoo occurred"), + (STATIC_TRACEBACK, str, pythonjsonlogger.defaults.traceback_default(STATIC_TRACEBACK)), # type: ignore[arg-type] + ( + SomeDataclass(things="le_things", stuff=99, junk=False), + dict, + {"things": "le_things", "stuff": 99, "junk": False}, + ), + (SomeDataclass, str, "SomeDataclass"), + (SomeClass, str, "SomeClass"), + (SomeClass(1234), str, NO_TEST), + (BrokenClass(), str, "__could_not_encode__"), + (MultiEnum.NONE, type(None), None), + (MultiEnum.BOOL, bool, MultiEnum.BOOL.value), + (MultiEnum.STR, str, MultiEnum.STR.value), + (MultiEnum.INT, int, MultiEnum.INT.value), + (MultiEnum.BYTES, str, "c29tZS1ieXRlcw=="), + (MultiEnum, list, [None, False, "somestring", 99, "c29tZS1ieXRlcw=="]), ], ) def test_common_types_encoded( - env: LoggingEnvironment, class_: type[BaseJsonFormatter], obj: object, type_: type + env: LoggingEnvironment, + class_: type[BaseJsonFormatter], + obj: object, + type_: type, + expected: Any, ): ## Known bad cases - if class_ is JsonFormatter: - if obj is SomeDataclass or isinstance(obj, SomeDataclass) or isinstance(obj, enum.Enum): - pytest.xfail() - - if pythonjsonlogger.ORJSON_AVAILABLE and class_ is OrjsonFormatter: - if ( - obj is Exception - or obj is BaseException - or isinstance(obj, BaseException) - or obj is SomeDataclass - or obj is SomeClass - or isinstance(obj, SomeClass) - or isinstance(obj, bytes) - or isinstance(obj, TracebackType) - or isinstance(obj, enum.EnumMeta) - or obj is MultiEnum.BYTES - ): - pytest.xfail() - if pythonjsonlogger.MSGSPEC_AVAILABLE and class_ is MsgspecFormatter: - if ( - obj is Exception - or obj is BaseException - or isinstance(obj, BaseException) - or obj is SomeDataclass - or obj is SomeClass - or isinstance(obj, SomeClass) - or isinstance(obj, TracebackType) - or isinstance(obj, enum.EnumMeta) - or ( - isinstance(obj, enum.Enum) - and obj in {MultiEnum.BYTES, MultiEnum.NONE, MultiEnum.BOOL} - ) + # Dataclass: https://github.com/jcrist/msgspec/issues/681 + # Enum: https://github.com/jcrist/msgspec/issues/680 + if obj is SomeDataclass or ( + isinstance(obj, enum.Enum) and obj in {MultiEnum.BYTES, MultiEnum.NONE, MultiEnum.BOOL} ): pytest.xfail() @@ -465,6 +478,18 @@ def test_common_types_encoded( assert isinstance(log_json["extra"], type_) assert isinstance(log_json["extra_dict"]["item"], type_) assert isinstance(log_json["extra_list"][0], type_) + + if expected is NO_TEST: + return + + if expected is None or isinstance(expected, bool): + assert log_json["extra"] is expected + assert log_json["extra_dict"]["item"] is expected + assert log_json["extra_list"][0] is expected + else: + assert log_json["extra"] == expected + assert log_json["extra_dict"]["item"] == expected + assert log_json["extra_list"][0] == expected return @@ -485,25 +510,6 @@ def custom_default(obj): ## JsonFormatter Specific ## ----------------------------------------------------------------------------- -def test_json_default_encoder(env: LoggingEnvironment): - env.set_formatter(JsonFormatter()) - - msg = { - "adate": datetime.datetime(1999, 12, 31, 23, 59), - "otherdate": datetime.date(1789, 7, 14), - "otherdatetime": datetime.datetime(1789, 7, 14, 23, 59), - "otherdatetimeagain": datetime.datetime(1900, 1, 1), - } - env.logger.info(msg) - log_json = env.load_json() - - assert log_json["adate"] == "1999-12-31T23:59:00" - assert log_json["otherdate"] == "1789-07-14" - assert log_json["otherdatetime"] == "1789-07-14T23:59:00" - assert log_json["otherdatetimeagain"] == "1900-01-01T00:00:00" - return - - def test_json_ensure_ascii_true(env: LoggingEnvironment): env.set_formatter(JsonFormatter()) env.logger.info("Привет")