Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(logger): BYOFormatter and Handler, UTC support, and more #404

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
082d1d0
chore: simplify handler conditional
heitorlessa Apr 14, 2021
4268e1c
refactor(formatter): reduce dict updates
heitorlessa Apr 14, 2021
cbce580
refactor(formatter): prep work for decoupling
heitorlessa Apr 15, 2021
5c8cf72
refactor: allow sampling_rate to be suppressed
heitorlessa Apr 15, 2021
8bf0ab4
refactor(formatter): merge exception formatting methods
heitorlessa Apr 15, 2021
b91ee96
refactor(formatter): support std log attr, explicit json default fn
heitorlessa Apr 16, 2021
87f5605
chore: rename test to new powertools formatter
heitorlessa Apr 16, 2021
209220f
feat(formatter): add UTC support
heitorlessa Apr 16, 2021
5a92347
chore: alias previous formatter
heitorlessa Apr 18, 2021
a4c9afa
refactor: move None strip to staticmethod, simplify extras
heitorlessa Apr 18, 2021
bf50133
refactor: remove unnecessary str coercion
heitorlessa Apr 18, 2021
46913ba
refactor: serializer/deserializer over encoder
heitorlessa Apr 18, 2021
754a2a2
fix: add missing reserved log attrs
heitorlessa Apr 18, 2021
456846a
refactor: add append/remove keys, common properties
heitorlessa Apr 18, 2021
2f828cd
refactor: update methods to use append_keys
heitorlessa Apr 19, 2021
903653f
feat: bring your own formatter
heitorlessa Apr 19, 2021
4345d8b
chore: move public methods up, remove unused test var
heitorlessa Apr 19, 2021
b328431
Update aws_lambda_powertools/logging/formatter.py
heitorlessa Apr 22, 2021
8488adc
feat: support custom logger handler
heitorlessa Apr 22, 2021
1a64e8b
Merge branch 'refactor/decouple-formatter' of https://github.com/heit…
heitorlessa Apr 22, 2021
a8c5126
chore: update docstrings to inform new parameters
heitorlessa Apr 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
223 changes: 138 additions & 85 deletions aws_lambda_powertools/logging/formatter.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
import json
import logging
import os
from typing import Dict, Iterable, Optional, Union
import time
from abc import ABCMeta, abstractmethod
from functools import partial
from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union

from ..shared import constants

STD_LOGGING_KEYS = (
RESERVED_LOG_ATTRS = (
"name",
"msg",
"args",
"level",
"levelname",
"levelno",
"pathname",
Expand All @@ -27,63 +31,140 @@
"processName",
"process",
"asctime",
"location",
"timestamp",
)


class JsonFormatter(logging.Formatter):
"""AWS Lambda Logging formatter.
class BasePowertoolsFormatter(logging.Formatter, metaclass=ABCMeta):
@abstractmethod
def append_keys(self, **additional_keys):
raise NotImplementedError()

Formats the log message as a JSON encoded string. If the message is a
dict it will be used directly. If the message can be parsed as JSON, then
the parse d value is used in the output record.
@abstractmethod
def remove_keys(self, keys: Iterable[str]):
raise NotImplementedError()

Originally taken from https://gitlab.com/hadrien/aws_lambda_logging/

"""
class LambdaPowertoolsFormatter(BasePowertoolsFormatter):
"""AWS Lambda Powertools Logging formatter.

def __init__(self, **kwargs):
"""Return a JsonFormatter instance.
Formats the log message as a JSON encoded string. If the message is a
dict it will be used directly.
"""

The `json_default` kwarg is used to specify a formatter for otherwise
unserializable values. It must not throw. Defaults to a function that
coerces the value to a string.
default_time_format = "%Y-%m-%d %H:%M:%S.%F%z" # '2021-04-17 18:19:57.656+0200'
custom_ms_time_directive = "%F"

def __init__(
self,
json_serializer: Optional[Callable[[Any], Any]] = None,
json_deserializer: Optional[Callable[[Any], Any]] = None,
json_default: Optional[Callable[[Any], Any]] = None,
datefmt: str = None,
log_record_order: List[str] = None,
utc: bool = False,
**kwargs
):
"""Return a LambdaPowertoolsFormatter instance.

The `log_record_order` kwarg is used to specify the order of the keys used in
the structured json logs. By default the order is: "level", "location", "message", "timestamp",
"service" and "sampling_rate".

Other kwargs are used to specify log field format strings.

Parameters
----------
json_serializer : Callable, optional
A function to serialize `obj` to a JSON formatted `str`, by default json.dumps
json_deserializer : Callable, optional
A function to deserialize `str`, `bytes`, bytearray` containing a JSON document to a Python `obj`,
by default json.loads
json_default : Callable, optional
A function to coercer unserializable values, by default str

Only used when no custom JSON encoder is set

datefmt : str, optional
String directives (strftime) to format log timestamp

See https://docs.python.org/3/library/time.html#time.strftime
utc : bool, optional
Set logging timestamp to UTC, by default False to continue to use local time as per stdlib
kwargs
Key-value to be included in log messages

Examples
--------
Create examples

Add example of standard log attributes that use str interpolation e.g. %(process)d
Add example of JSON default fn for unserializable values
Add example for UTC
"""
# Set the default unserializable function, by default values will be cast as str.
self.default_json_formatter = kwargs.pop("json_default", str)
# Set the insertion order for the log messages
self.log_format = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
self.reserved_keys = ["timestamp", "level", "location"]
# Set the date format used by `asctime`
super(JsonFormatter, self).__init__(datefmt=kwargs.pop("datefmt", None))
self.json_deserializer = json_deserializer or json.loads
self.json_default = json_default or str
self.json_serializer = json_serializer or partial(json.dumps, default=self.json_default, separators=(",", ":"))
self.datefmt = datefmt
self.utc = utc
self.log_record_order = log_record_order or ["level", "location", "message", "timestamp"]
self.log_format = dict.fromkeys(self.log_record_order) # Set the insertion order for the log messages
self.update_formatter = self.append_keys # alias to previous method

if self.utc:
self.converter = time.gmtime

self.log_format.update(self._build_root_keys(**kwargs))
super(LambdaPowertoolsFormatter, self).__init__(datefmt=self.datefmt)

keys_combined = {**self._build_default_keys(), **kwargs}
self.log_format.update(**keys_combined)

def format(self, record: logging.LogRecord) -> str: # noqa: A003
"""Format logging record as structured JSON str"""
formatted_log = self._extract_log_keys(log_record=record)
formatted_log["message"] = self._extract_log_message(log_record=record)
formatted_log["exception"], formatted_log["exception_name"] = self._extract_log_exception(log_record=record)
formatted_log["xray_trace_id"] = self._get_latest_trace_id()
formatted_log = self._strip_none_records(records=formatted_log)

return self.json_serializer(formatted_log)

def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str:
# NOTE: Pyhton time.strftime doesn't provide msec directives
# so we create a custom one (%F) and replace logging record ts
# Reason 2 is that std logging doesn't support msec after TZ
record_ts = self.converter(record.created)
if datefmt:
ts_formatted = time.strftime(datefmt, record_ts)
else:
msec = "%03d" % record.msecs
custom_fmt = self.default_time_format.replace(self.custom_ms_time_directive, msec)
ts_formatted = time.strftime(custom_fmt, record_ts)
return ts_formatted
heitorlessa marked this conversation as resolved.
Show resolved Hide resolved

def append_keys(self, **additional_keys):
self.log_format.update(additional_keys)

def remove_keys(self, keys: Iterable[str]):
for key in keys:
self.log_format.pop(key, None)

@staticmethod
def _build_root_keys(**kwargs):
def _build_default_keys():
return {
"level": "%(levelname)s",
"location": "%(funcName)s:%(lineno)d",
"timestamp": "%(asctime)s",
**kwargs,
}

@staticmethod
def _get_latest_trace_id():
xray_trace_id = os.getenv(constants.XRAY_TRACE_ID_ENV)
return xray_trace_id.split(";")[0].replace("Root=", "") if xray_trace_id else None

def update_formatter(self, **kwargs):
self.log_format.update(kwargs)

@staticmethod
def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]:
"""Extract message from log record and attempt to JSON decode it
def _extract_log_message(self, log_record: logging.LogRecord) -> Union[Dict[str, Any], str, bool, Iterable]:
"""Extract message from log record and attempt to JSON decode it if str

Parameters
----------
Expand All @@ -95,20 +176,19 @@ def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool
message: Union[Dict, str, bool, Iterable]
Extracted message
"""
if isinstance(log_record.msg, dict):
return log_record.msg
message = log_record.msg
if isinstance(message, dict):
return message

message: str = log_record.getMessage()

# Attempt to decode non-str messages e.g. msg = '{"x": "y"}'
try:
message = json.loads(log_record.msg)
except (json.decoder.JSONDecodeError, TypeError, ValueError):
pass
if isinstance(message, str): # could be a JSON string
try:
message = self.json_deserializer(message)
except (json.decoder.JSONDecodeError, TypeError, ValueError):
pass

return message

def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str]:
def _extract_log_exception(self, log_record: logging.LogRecord) -> Union[Tuple[str, str], Tuple[None, None]]:
"""Format traceback information, if available

Parameters
Expand All @@ -118,33 +198,15 @@ def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str]

Returns
-------
log_record: Optional[str]
Log record with constant traceback info
log_record: Optional[Tuple[str, str]]
Log record with constant traceback info and exception name
"""
if log_record.exc_info:
return self.formatException(log_record.exc_info)

return None
return self.formatException(log_record.exc_info), log_record.exc_info[0].__name__

def _extract_log_exception_name(self, log_record: logging.LogRecord) -> Optional[str]:
"""Extract the exception name, if available
return None, None

Parameters
----------
log_record : logging.LogRecord
Log record to extract exception name from

Returns
-------
log_record: Optional[str]
Log record with exception name
"""
if log_record.exc_info:
return log_record.exc_info[0].__name__

return None

def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict:
def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict[str, Any]:
"""Extract and parse custom and reserved log keys

Parameters
Expand All @@ -157,36 +219,27 @@ def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict:
formatted_log: Dict
Structured log as dictionary
"""
record_dict = log_record.__dict__.copy() # has extra kwargs we are after
record_dict["asctime"] = self.formatTime(log_record, self.datefmt)
record_dict = log_record.__dict__.copy()
record_dict["asctime"] = self.formatTime(record=log_record, datefmt=self.datefmt)
extras = {k: v for k, v in record_dict.items() if k not in RESERVED_LOG_ATTRS}

formatted_log = {}
formatted_log = {**extras}

# We have to iterate over a default or existing log structure
# then replace any logging expression for reserved keys e.g. '%(level)s' to 'INFO'
# and lastly add or replace incoming keys (those added within the constructor or .structure_logs method)
# Iterate over a default or existing log structure
# then replace any std log attribute e.g. '%(level)s' to 'INFO', '%(process)d to '4773'
# lastly add or replace incoming keys (those added within the constructor or .structure_logs method)
for key, value in self.log_format.items():
if value and key in self.reserved_keys:
if value and key in RESERVED_LOG_ATTRS:
formatted_log[key] = value % record_dict
else:
formatted_log[key] = value

# pick up extra keys when logging a new message e.g. log.info("my message", extra={"additional_key": "value"}
# these messages will be added to the root of the final structure not within `message` key
for key, value in record_dict.items():
if key not in STD_LOGGING_KEYS:
formatted_log[key] = value

return formatted_log

def format(self, record): # noqa: A003
formatted_log = self._extract_log_keys(log_record=record)
formatted_log["message"] = self._extract_log_message(log_record=record)
formatted_log["exception_name"] = self._extract_log_exception_name(log_record=record)
formatted_log["exception"] = self._extract_log_exception(log_record=record)
formatted_log.update({"xray_trace_id": self._get_latest_trace_id()}) # fetch latest Trace ID, if any
@staticmethod
def _strip_none_records(records: Dict[str, Any]) -> Dict[str, Any]:
"""Remove any key with None as value"""
return {k: v for k, v in records.items() if v is not None}

# Filter out top level key with values that are None
formatted_log = {k: v for k, v in formatted_log.items() if v is not None}

return json.dumps(formatted_log, default=self.default_json_formatter)
JsonFormatter = LambdaPowertoolsFormatter # alias to previous formatter
Loading