Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support extra parameter in Logger messages #257

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 108 additions & 35 deletions aws_lambda_powertools/logging/formatter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,31 @@
import json
import logging
import os
from typing import Dict, Iterable, Optional, Union

STD_LOGGING_KEYS = (
"name",
"msg",
"args",
"levelname",
"levelno",
"pathname",
"filename",
"module",
"exc_info",
"exc_text",
"stack_info",
"lineno",
"funcName",
"created",
"msecs",
"relativeCreated",
"thread",
"threadName",
"processName",
"process",
"asctime",
)


class JsonFormatter(logging.Formatter):
Expand Down Expand Up @@ -30,12 +55,12 @@ def __init__(self, **kwargs):
# Set the default unserializable function, by default values will be cast as str.
self.default_json_formatter = kwargs.pop("json_default", str)
# Set the insertion order for the log messages
self.format_dict = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
self.log_format = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
self.reserved_keys = ["timestamp", "level", "location"]
# Set the date format used by `asctime`
super(JsonFormatter, self).__init__(datefmt=kwargs.pop("datefmt", None))

self.format_dict.update(self._build_root_keys(**kwargs))
self.log_format.update(self._build_root_keys(**kwargs))

@staticmethod
def _build_root_keys(**kwargs):
Expand All @@ -52,47 +77,95 @@ def _get_latest_trace_id():
return xray_trace_id.split(";")[0].replace("Root=", "") if xray_trace_id else None

def update_formatter(self, **kwargs):
self.format_dict.update(kwargs)
self.log_format.update(kwargs)

def format(self, record): # noqa: A003
record_dict = record.__dict__.copy()
record_dict["asctime"] = self.formatTime(record, self.datefmt)
@staticmethod
def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]:
"""Extract message from log record and attempt to JSON decode it

Parameters
----------
log_record : logging.LogRecord
Log record to extract message from

Returns
-------
message: Union[Dict, str, bool, Iterable]
Extracted message
"""
if isinstance(log_record.msg, dict):
return log_record.msg

log_dict = {}
message: str = log_record.getMessage()

for key, value in self.format_dict.items():
if value and key in self.reserved_keys:
# converts default logging expr to its record value
# e.g. '%(asctime)s' to '2020-04-24 09:35:40,698'
log_dict[key] = value % record_dict
else:
log_dict[key] = value
# Attempt to decode non-str messages e.g. msg = '{"x": "y"}'
try:
message = json.loads(log_record.msg)
except (json.decoder.JSONDecodeError, TypeError, ValueError):
pass

return message

def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str]:
"""Format traceback information, if available

Parameters
----------
log_record : logging.LogRecord
Log record to extract message from

Returns
-------
log_record: Optional[str]
Log record with constant traceback info
"""
if log_record.exc_info:
return self.formatException(log_record.exc_info)

return None

if isinstance(record_dict["msg"], dict):
log_dict["message"] = record_dict["msg"]
else:
log_dict["message"] = record.getMessage()
def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict:
"""Extract and parse custom and reserved log keys

# Attempt to decode the message as JSON, if so, merge it with the
# overall message for clarity.
try:
log_dict["message"] = json.loads(log_dict["message"])
except (json.decoder.JSONDecodeError, TypeError, ValueError):
pass
Parameters
----------
log_record : logging.LogRecord
Log record to extract keys from

if record.exc_info and not record.exc_text:
# Cache the traceback text to avoid converting it multiple times
# (it's constant anyway)
# from logging.Formatter:format
record.exc_text = self.formatException(record.exc_info)
Returns
-------
formatted_log: Dict
Structured log as dictionary
"""
record_dict = log_record.__dict__.copy() # has extra kwargs we are after
record_dict["asctime"] = self.formatTime(log_record, self.datefmt)

if record.exc_text:
log_dict["exception"] = record.exc_text
formatted_log = {}

# fetch latest X-Ray Trace ID, if any
log_dict.update({"xray_trace_id": self._get_latest_trace_id()})
# We have to iterate over a default or existing log structure
# then replace any logging expression for reserved keys e.g. '%(level)s' to 'INFO'
# and lastly add or replace incoming keys (those added within the constructor or .structure_logs method)
for key, value in self.log_format.items():
if value and key in self.reserved_keys:
formatted_log[key] = value % record_dict
else:
formatted_log[key] = value

# pick up extra keys when logging a new message e.g. log.info("my message", extra={"additional_key": "value"}
# these messages will be added to the root of the final structure not within `message` key
for key, value in record_dict.items():
if key not in STD_LOGGING_KEYS:
formatted_log[key] = value

return formatted_log

def format(self, record): # noqa: A003
formatted_log = self._extract_log_keys(log_record=record)
formatted_log["message"] = self._extract_log_message(log_record=record)
formatted_log["exception"] = self._extract_log_exception(log_record=record)
formatted_log.update({"xray_trace_id": self._get_latest_trace_id()}) # fetch latest Trace ID, if any

# Filter out top level key with values that are None
log_dict = {k: v for k, v in log_dict.items() if v is not None}
formatted_log = {k: v for k, v in formatted_log.items() if v is not None}

return json.dumps(log_dict, default=self.default_json_formatter)
return json.dumps(formatted_log, default=self.default_json_formatter)
93 changes: 84 additions & 9 deletions docs/content/core/logger.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,13 @@ def handler(event, context):

## Appending additional keys

You can append additional keys using either mechanism:

* Persist new keys across all future log messages via `structure_logs`
* Add additional keys on a per log message basis via `extra` parameter

### structure_logs

You can append your own keys to your existing Logger via `structure_logs` with **append** param.

```python:title=collect.py
Expand Down Expand Up @@ -179,18 +186,41 @@ This example will add `order_id` if its value is not empty, and in subsequent in
"level": "INFO",
"location": "collect.handler:1",
"service": "payment",
"lambda_function_name": "test",
"lambda_function_memory_size": 128,
"lambda_function_arn": "arn:aws:lambda:eu-west-1:12345678910:function:test",
"lambda_request_id": "52fdfc07-2182-154f-163f-5f0f9a621d72",
"cold_start": true,
"sampling_rate": 0.0,
"order_id": "order_id_value", // highlight-line
"message": "Collecting payment"
}
```
</details>

### extra parameter

Extra parameter is available for all log levels, as implemented in the standard logging library. It accepts any dictionary, and it'll be added as part of the root structure of the logs.

```python:title=extra_parameter.py
logger = Logger(service="payment")

fields = { "request_id": "1123" }

logger.info("Hello", extra=fields) # highlight-line
```

<details>
<summary><strong>Excerpt output in CloudWatch Logs</strong></summary>

```json:title=cloudwatch_logs.json
{
"timestamp": "2021-01-12 14:08:12,357",
"level": "INFO",
"location": "collect.handler:1",
"service": "payment",
"sampling_rate": 0.0,
"request_id": "1123", // highlight-line
"message": "Collecting payment"
}
```
</details>

## Reusing Logger across your code

Logger supports inheritance via `child` parameter. This allows you to create multiple Loggers across your code base, and propagate changes such as new keys to all Loggers.
Expand Down Expand Up @@ -291,13 +321,13 @@ def handler(event, context):

If you're migrating from other Loggers, there are few key points to be aware of: **Service parameter**, **Inheriting Loggers**, **Overriding Log records**, and **Logging exceptions**.

### The service parameter
### service parameter

Service is what defines what the function is responsible for, or part of (e.g payment service), and the name of the Logger.

For Logger, the `service` is the logging key customers can use to search log operations for one or more functions - For example, **search for all errors, or messages like X, where service is payment**.

### Inheriting Loggers
### inheriting Loggers

> Python Logging hierarchy happens via the dot notation: `service`, `service.child`, `service.child_2`.

Expand Down Expand Up @@ -325,7 +355,7 @@ In this case, Logger will register a Logger named `payment`, and a Logger named

This can be fixed by either ensuring both has the `service` value as `payment`, or simply use the environment variable `POWERTOOLS_SERVICE_NAME` to ensure service value will be the same across all Loggers when not explicitly set.

### Overriding Log records
### overriding Log records

You might want to continue to use the same date formatting style, or override `location` to display the `package.function_name:line_number` as you previously had.

Expand Down Expand Up @@ -355,7 +385,7 @@ logger = Logger(stream=stdout, log_record_order=["level","location","message","t

Some keys cannot be supressed in the Log records: `sampling_rate` is part of the specification and cannot be supressed; `xray_trace_id` is supressed automatically if X-Ray is not enabled in the Lambda function, and added automatically if it is.

### Logging exceptions
### logging exceptions

When logging exceptions, Logger will add a new key named `exception`, and will serialize the full traceback as a string.

Expand Down Expand Up @@ -434,3 +464,48 @@ def handler(event: Dict, context: LambdaContext) -> List:

return response.get("Buckets", [])
```

**What's the difference between `structure_log` and `extra`?**

Keys added with `structure_log` will persist across multiple log messages while keys added via `extra` will only be available in a given log message operation.

**Example - Persisting payment_id not request_id**

```python
from aws_lambda_powertools import Logger

logger = Logger(service="payment")
logger.structure_logs(append=True, payment_id="123456789")

try:
booking_id = book_flight()
logger.info("Flight booked successfully", extra={ "booking_id": booking_id})
except BookingReservationError:
...

logger.info("goodbye")
```

Note that `payment_id` remains in both log messages while `booking_id` is only available in the first message.

```json
{
"level": "INFO",
"location": "<module>:5",
"message": "Flight booked successfully",
"timestamp": "2021-01-12 14:09:10,859",
"service": "payment",
"sampling_rate": 0.0,
"payment_id": "123456789",
"booking_id": "75edbad0-0857-4fc9-b547-6180e2f7959b"
},
{
"level": "INFO",
"location": "<module>:6",
"message": "goodbye",
"timestamp": "2021-01-12 14:09:10,860",
"service": "payment",
"sampling_rate": 0.0,
"payment_id": "123456789"
}
```
19 changes: 19 additions & 0 deletions tests/functional/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,22 @@ def test_logger_do_not_log_twice_when_root_logger_is_setup(stdout, service_name)
# since child's log records propagated to root logger should be rejected
logs = list(stdout.getvalue().strip().split("\n"))
assert len(logs) == 2


def test_logger_extra_kwargs(stdout, service_name):
# GIVEN Logger is initialized
logger = Logger(service=service_name, stream=stdout)

# WHEN `request_id` is an extra field in a log message to the existing structured log
fields = {"request_id": "blah"}

logger.info("with extra fields", extra=fields)
logger.info("without extra fields")

extra_fields_log, no_extra_fields_log = capture_multiple_logging_statements_output(stdout)

# THEN first log should have request_id field in the root structure
assert "request_id" in extra_fields_log

# THEN second log should not have request_id in the root structure
assert "request_id" not in no_extra_fields_log