langchain-ai · baskaryan · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024 · Oct 3, 2024
diff --git a/libs/core/langchain_core/messages/ai.py b/libs/core/langchain_core/messages/ai.py
@@ -2,7 +2,7 @@
 from typing import Any, Literal, Optional, Union
 
 from pydantic import model_validator
-from typing_extensions import Self, TypedDict
+from typing_extensions import NotRequired, Self, TypedDict
 
 from langchain_core.messages.base import (
     BaseMessage,
@@ -29,6 +29,62 @@
 from langchain_core.utils.json import parse_partial_json
 
 
+class InputTokenDetails(TypedDict, total=False):
+    """Breakdown of input token counts.
+
+    Does *not* need to sum to full input token count. Does *not* need to have all keys.
+
+    Example:
+
+        .. code-block:: python
+
+            {
+                "audio": 10,
+                "cache_creation": 200,
+                "cache_read": 100,
+            }
+    """
+
+    audio: int
+    """Audio input tokens."""
+    cache_creation: int
+    """Input tokens that were cached and there was a cache miss.
+
+    Since there was a cache miss, the cache was created from these tokens.
+    """
+    cache_read: int
+    """Input tokens that were cached and there was a cache hit.
+
+    Since there was a cache hit, the tokens were read from the cache. More precisely,
+    the model state given these tokens was read from the cache.
+    """
+
+
+class OutputTokenDetails(TypedDict, total=False):
+    """Breakdown of output token counts.
+
+    Does *not* need to sum to full output token count. Does *not* need to have all keys.
+
+    Example:
+
+        .. code-block:: python
+
+            {
+                "audio": 10,
+                "reasoning": 200,
+            }
+    """
+
+    audio: int
+    """Audio output tokens."""
+    reasoning: int
+    """Reasoning output tokens.
+
+    Tokens generated by the model in a chain of thought process (i.e. by OpenAI's o1
+    models) that are not returned as part of model output.
+    """
+
+
 class UsageMetadata(TypedDict):
     """Usage metadata for a message, such as token counts.
 
@@ -39,18 +95,29 @@ class UsageMetadata(TypedDict):
         .. code-block:: python
 
             {
-                "input_tokens": 10,
-                "output_tokens": 20,
-                "total_tokens": 30
+                "input_tokens": 350,
+                "output_tokens": 240,
+                "total_tokens": 590,
+                "input_token_details": {
+                    "audio": 10,
+                    "cache_creation": 200,
+                    "cache_read": 100,
+                },
+                "output_token_details": {
+                    "audio": 10,
+                    "reasoning": 200,
+                }
             }
     """
 
     input_tokens: int
-    """Count of input (or prompt) tokens."""
+    """Count of input (or prompt) tokens. Sum of all input token types."""
     output_tokens: int
-    """Count of output (or completion) tokens."""
+    """Count of output (or completion) tokens. Sum of all output token types."""
     total_tokens: int
-    """Total token count."""
+    """Total token count. Sum of input_tokens + output_tokens."""
+    input_token_details: NotRequired[InputTokenDetails]
+    output_token_details: NotRequired[OutputTokenDetails]
 
 
 class AIMessage(BaseMessage):

diff --git a/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr b/libs/core/tests/unit_tests/prompts/__snapshots__/test_chat.ambr
@@ -677,6 +677,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+
+          Example:
+
+              .. code-block:: python
+
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -743,6 +776,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+
+          Example:
+
+              .. code-block:: python
+
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -1245,16 +1306,31 @@
               .. code-block:: python
 
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',
@@ -2008,6 +2084,39 @@
         'title': 'HumanMessageChunk',
         'type': 'object',
       }),
+      'InputTokenDetails': dict({
+        'description': '''
+          Breakdown of input token counts.
+
+          Does *not* need to sum to full input token count. Does *not* need to have all keys.
+
+          Example:
+
+              .. code-block:: python
+
+                  {
+                      "audio": 10,
+                      "cache_creation": 200,
+                      "cache_read": 100,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'cache_creation': dict({
+            'title': 'Cache Creation',
+            'type': 'integer',
+          }),
+          'cache_read': dict({
+            'title': 'Cache Read',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'InputTokenDetails',
+        'type': 'object',
+      }),
       'InvalidToolCall': dict({
         'description': '''
           Allowance for errors made by LLM.
@@ -2074,6 +2183,34 @@
         'title': 'InvalidToolCall',
         'type': 'object',
       }),
+      'OutputTokenDetails': dict({
+        'description': '''
+          Breakdown of output token counts.
+
+          Does *not* need to sum to full output token count. Does *not* need to have all keys.
+
+          Example:
+
+              .. code-block:: python
+
+                  {
+                      "audio": 10,
+                      "reasoning": 200,
+                  }
+        ''',
+        'properties': dict({
+          'audio': dict({
+            'title': 'Audio',
+            'type': 'integer',
+          }),
+          'reasoning': dict({
+            'title': 'Reasoning',
+            'type': 'integer',
+          }),
+        }),
+        'title': 'OutputTokenDetails',
+        'type': 'object',
+      }),
       'SystemMessage': dict({
         'additionalProperties': True,
         'description': '''
@@ -2576,16 +2713,31 @@
               .. code-block:: python
 
                   {
-                      "input_tokens": 10,
-                      "output_tokens": 20,
-                      "total_tokens": 30
+                      "input_tokens": 350,
+                      "output_tokens": 240,
+                      "total_tokens": 590,
+                      "input_token_details": {
+                          "audio": 10,
+                          "cache_creation": 200,
+                          "cache_read": 100,
+                      },
+                      "output_token_details": {
+                          "audio": 10,
+                          "reasoning": 200,
+                      }
                   }
         ''',
         'properties': dict({
+          'input_token_details': dict({
+            '$ref': '#/$defs/InputTokenDetails',
+          }),
           'input_tokens': dict({
             'title': 'Input Tokens',
             'type': 'integer',
           }),
+          'output_token_details': dict({
+            '$ref': '#/$defs/OutputTokenDetails',
+          }),
           'output_tokens': dict({
             'title': 'Output Tokens',
             'type': 'integer',