aws-powertools · heitorlessa · Jan 12, 2021 · Jan 12, 2021 · Jan 12, 2021 · Jan 12, 2021
@@ -1,6 +1,31 @@
 import json
 import logging
 import os
+from typing import Dict, Iterable, Optional, Union
+
+STD_LOGGING_KEYS = (
+    "name",
+    "msg",
+    "args",
+    "levelname",
+    "levelno",
+    "pathname",
+    "filename",
+    "module",
+    "exc_info",
+    "exc_text",
+    "stack_info",
+    "lineno",
+    "funcName",
+    "created",
+    "msecs",
+    "relativeCreated",
+    "thread",
+    "threadName",
+    "processName",
+    "process",
+    "asctime",
+)
 
 
 class JsonFormatter(logging.Formatter):
@@ -30,12 +55,12 @@ def __init__(self, **kwargs):
         # Set the default unserializable function, by default values will be cast as str.
         self.default_json_formatter = kwargs.pop("json_default", str)
         # Set the insertion order for the log messages
-        self.format_dict = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
+        self.log_format = dict.fromkeys(kwargs.pop("log_record_order", ["level", "location", "message", "timestamp"]))
         self.reserved_keys = ["timestamp", "level", "location"]
         # Set the date format used by `asctime`
         super(JsonFormatter, self).__init__(datefmt=kwargs.pop("datefmt", None))
 
-        self.format_dict.update(self._build_root_keys(**kwargs))
+        self.log_format.update(self._build_root_keys(**kwargs))
 
     @staticmethod
     def _build_root_keys(**kwargs):
@@ -52,47 +77,95 @@ def _get_latest_trace_id():
         return xray_trace_id.split(";")[0].replace("Root=", "") if xray_trace_id else None
 
     def update_formatter(self, **kwargs):
-        self.format_dict.update(kwargs)
+        self.log_format.update(kwargs)
 
-    def format(self, record):  # noqa: A003
-        record_dict = record.__dict__.copy()
-        record_dict["asctime"] = self.formatTime(record, self.datefmt)
+    @staticmethod
+    def _extract_log_message(log_record: logging.LogRecord) -> Union[Dict, str, bool, Iterable]:
+        """Extract message from log record and attempt to JSON decode it
+
+        Parameters
+        ----------
+        log_record : logging.LogRecord
+            Log record to extract message from
+
+        Returns
+        -------
+        message: Union[Dict, str, bool, Iterable]
+            Extracted message
+        """
+        if isinstance(log_record.msg, dict):
+            return log_record.msg
 
-        log_dict = {}
+        message: str = log_record.getMessage()
 
-        for key, value in self.format_dict.items():
-            if value and key in self.reserved_keys:
-                # converts default logging expr to its record value
-                # e.g. '%(asctime)s' to '2020-04-24 09:35:40,698'
-                log_dict[key] = value % record_dict
-            else:
-                log_dict[key] = value
+        # Attempt to decode non-str messages e.g. msg = '{"x": "y"}'
+        try:
+            message = json.loads(log_record.msg)
+        except (json.decoder.JSONDecodeError, TypeError, ValueError):
+            pass
+
+        return message
+
+    def _extract_log_exception(self, log_record: logging.LogRecord) -> Optional[str]:
+        """Format traceback information, if available
+
+        Parameters
+        ----------
+        log_record : logging.LogRecord
+            Log record to extract message from
+
+        Returns
+        -------
+        log_record: Optional[str]
+            Log record with constant traceback info
+        """
+        if log_record.exc_info:
+            return self.formatException(log_record.exc_info)
+
+        return None
 
-        if isinstance(record_dict["msg"], dict):
-            log_dict["message"] = record_dict["msg"]
-        else:
-            log_dict["message"] = record.getMessage()
+    def _extract_log_keys(self, log_record: logging.LogRecord) -> Dict:
+        """Extract and parse custom and reserved log keys
 
-            # Attempt to decode the message as JSON, if so, merge it with the
-            # overall message for clarity.
-            try:
-                log_dict["message"] = json.loads(log_dict["message"])
-            except (json.decoder.JSONDecodeError, TypeError, ValueError):
-                pass
+        Parameters
+        ----------
+        log_record : logging.LogRecord
+            Log record to extract keys from
 
-        if record.exc_info and not record.exc_text:
-            # Cache the traceback text to avoid converting it multiple times
-            # (it's constant anyway)
-            # from logging.Formatter:format
-            record.exc_text = self.formatException(record.exc_info)
+        Returns
+        -------
+        formatted_log: Dict
+            Structured log as dictionary
+        """
+        record_dict = log_record.__dict__.copy()  # has extra kwargs we are after
+        record_dict["asctime"] = self.formatTime(log_record, self.datefmt)
 
-        if record.exc_text:
-            log_dict["exception"] = record.exc_text
+        formatted_log = {}
 
-        # fetch latest X-Ray Trace ID, if any
-        log_dict.update({"xray_trace_id": self._get_latest_trace_id()})
+        # We have to iterate over a default or existing log structure
+        # then replace any logging expression for reserved keys e.g. '%(level)s' to 'INFO'
+        # and lastly add or replace incoming keys (those added within the constructor or .structure_logs method)
+        for key, value in self.log_format.items():
+            if value and key in self.reserved_keys:
+                formatted_log[key] = value % record_dict
+            else:
+                formatted_log[key] = value
+
+        # pick up extra keys when logging a new message e.g. log.info("my message", extra={"additional_key": "value"}
+        # these messages will be added to the root of the final structure not within `message` key
+        for key, value in record_dict.items():
+            if key not in STD_LOGGING_KEYS:
+                formatted_log[key] = value
+
+        return formatted_log
+
+    def format(self, record):  # noqa: A003
+        formatted_log = self._extract_log_keys(log_record=record)
+        formatted_log["message"] = self._extract_log_message(log_record=record)
+        formatted_log["exception"] = self._extract_log_exception(log_record=record)
+        formatted_log.update({"xray_trace_id": self._get_latest_trace_id()})  # fetch latest Trace ID, if any
 
         # Filter out top level key with values that are None
-        log_dict = {k: v for k, v in log_dict.items() if v is not None}
+        formatted_log = {k: v for k, v in formatted_log.items() if v is not None}
 
-        return json.dumps(log_dict, default=self.default_json_formatter)
+        return json.dumps(formatted_log, default=self.default_json_formatter)
@@ -150,6 +150,13 @@ def handler(event, context):
 
 ## Appending additional keys
 
+You can append additional keys using either mechanism:
+
+* Persist new keys across all future log messages via `structure_logs`
+* Add additional keys on a per log message basis via `extra` parameter
+
+### structure_logs
+
 You can append your own keys to your existing Logger via `structure_logs` with **append** param.
 
 ```python:title=collect.py
@@ -179,18 +186,41 @@ This example will add `order_id` if its value is not empty, and in subsequent in
    "level": "INFO",
    "location": "collect.handler:1",
    "service": "payment",
-   "lambda_function_name": "test",
-   "lambda_function_memory_size": 128,
-   "lambda_function_arn": "arn:aws:lambda:eu-west-1:12345678910:function:test",
-   "lambda_request_id": "52fdfc07-2182-154f-163f-5f0f9a621d72",
-   "cold_start": true,
    "sampling_rate": 0.0,
    "order_id": "order_id_value", // highlight-line
    "message": "Collecting payment"
 }
 ```
 </details>
 
+### extra parameter
+
+Extra parameter is available for all log levels, as implemented in the standard logging library. It accepts any dictionary, and it'll be added as part of the root structure of the logs.
+
+```python:title=extra_parameter.py
+logger = Logger(service="payment")
+
+fields = { "request_id": "1123" }
+
+logger.info("Hello", extra=fields) # highlight-line
+```
+
+<details>
+<summary><strong>Excerpt output in CloudWatch Logs</strong></summary>
+
+```json:title=cloudwatch_logs.json
+{
+   "timestamp": "2021-01-12 14:08:12,357",
+   "level": "INFO",
+   "location": "collect.handler:1",
+   "service": "payment",
+   "sampling_rate": 0.0,
+   "request_id": "1123", // highlight-line
+   "message": "Collecting payment"
+}
+```
+</details>
+
 ## Reusing Logger across your code
 
 Logger supports inheritance via `child` parameter. This allows you to create multiple Loggers across your code base, and propagate changes such as new keys to all Loggers.
@@ -291,13 +321,13 @@ def handler(event, context):
 
 If you're migrating from other Loggers, there are few key points to be aware of: **Service parameter**, **Inheriting Loggers**, **Overriding Log records**, and **Logging exceptions**.
 
-### The service parameter
+### service parameter
 
 Service is what defines what the function is responsible for, or part of (e.g payment service), and the name of the Logger.
 
 For Logger, the `service` is the logging key customers can use to search log operations for one or more functions - For example, **search for all errors, or messages like X, where service is payment**.
 
-### Inheriting Loggers
+### inheriting Loggers
 
 > Python Logging hierarchy happens via the dot notation: `service`, `service.child`, `service.child_2`.
 
@@ -325,7 +355,7 @@ In this case, Logger will register a Logger named `payment`, and a Logger named
 
 This can be fixed by either ensuring both has the `service` value as `payment`, or simply use the environment variable `POWERTOOLS_SERVICE_NAME` to ensure service value will be the same across all Loggers when not explicitly set.
 
-### Overriding Log records
+### overriding Log records
 
 You might want to continue to use the same date formatting style, or override `location` to display the `package.function_name:line_number` as you previously had.
 
@@ -355,7 +385,7 @@ logger = Logger(stream=stdout, log_record_order=["level","location","message","t
 
 Some keys cannot be supressed in the Log records: `sampling_rate` is part of the specification and cannot be supressed; `xray_trace_id` is supressed automatically if X-Ray is not enabled in the Lambda function, and added automatically if it is.
 
-### Logging exceptions
+### logging exceptions
 
 When logging exceptions, Logger will add a new key named `exception`, and will serialize the full traceback as a string.
 
@@ -434,3 +464,48 @@ def handler(event: Dict, context: LambdaContext) -> List:
 
     return response.get("Buckets", [])
 ```
+
+**What's the difference between `structure_log` and `extra`?**
+
+Keys added with `structure_log` will persist across multiple log messages while keys added via `extra` will only be available in a given log message operation.
+
+**Example - Persisting payment_id not request_id**
+
+```python
+from aws_lambda_powertools import Logger
+
+logger = Logger(service="payment")
+logger.structure_logs(append=True, payment_id="123456789")
+
+try:
+    booking_id = book_flight()
+    logger.info("Flight booked successfully", extra={ "booking_id": booking_id})
+except BookingReservationError:
+    ...
+
+logger.info("goodbye")
+```
+
+Note that `payment_id` remains in both log messages while `booking_id` is only available in the first message.
+
+```json
+{
+    "level": "INFO",
+    "location": "<module>:5",
+    "message": "Flight booked successfully",
+    "timestamp": "2021-01-12 14:09:10,859",
+    "service": "payment",
+    "sampling_rate": 0.0,
+    "payment_id": "123456789",
+    "booking_id": "75edbad0-0857-4fc9-b547-6180e2f7959b"
+},
+{
+    "level": "INFO",
+    "location": "<module>:6",
+    "message": "goodbye",
+    "timestamp": "2021-01-12 14:09:10,860",
+    "service": "payment",
+    "sampling_rate": 0.0,
+    "payment_id": "123456789"
+}
+```
@@ -381,3 +381,22 @@ def test_logger_do_not_log_twice_when_root_logger_is_setup(stdout, service_name)
     # since child's log records propagated to root logger should be rejected
     logs = list(stdout.getvalue().strip().split("\n"))
     assert len(logs) == 2
+
+
+def test_logger_extra_kwargs(stdout, service_name):
+    # GIVEN Logger is initialized
+    logger = Logger(service=service_name, stream=stdout)
+
+    # WHEN `request_id` is an extra field in a log message to the existing structured log
+    fields = {"request_id": "blah"}
+
+    logger.info("with extra fields", extra=fields)
+    logger.info("without extra fields")
+
+    extra_fields_log, no_extra_fields_log = capture_multiple_logging_statements_output(stdout)
+
+    # THEN first log should have request_id field in the root structure
+    assert "request_id" in extra_fields_log
+
+    # THEN second log should not have request_id in the root structure
+    assert "request_id" not in no_extra_fields_log