langchain-ai · mattf · Dec 14, 2024 · Dec 13, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/_common.py
@@ -100,8 +100,8 @@ class _NVIDIAClient(BaseModel):
     last_inputs: Optional[dict] = Field(
         default={}, description="Last inputs sent over to the server"
     )
-    last_response: Response = Field(
-        None, description="Last response sent from the server"
+    last_response: Optional[Response] = Field(
+        Response(), description="Last response sent from the server"
     )
     headers_tmpl: dict = Field(
         {

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/chat_models.py
@@ -253,7 +253,7 @@ class ChatNVIDIA(BaseChatModel):
             response = model.invoke("Hello")
     """
 
-    _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
+    _client: _NVIDIAClient = PrivateAttr()
     base_url: Optional[str] = Field(
         default=None,
         description="Base url for model listing an invocation",

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -34,7 +34,7 @@ class NVIDIAEmbeddings(BaseModel, Embeddings):
         validate_assignment=True,
     )
 
-    _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
+    _client: _NVIDIAClient = PrivateAttr()
     base_url: Optional[str] = Field(
         default=None,
         description="Base url for model listing an invocation",

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/llm.py
@@ -23,7 +23,7 @@ class NVIDIA(LLM):
         validate_assignment=True,
     )
 
-    _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
+    _client: _NVIDIAClient = PrivateAttr()
     _default_model_name: str = "nvidia/mistral-nemo-minitron-8b-base"
     base_url: Optional[str] = Field(
         default=None,

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/reranking.py
@@ -34,7 +34,7 @@ class NVIDIARerank(BaseDocumentCompressor):
         validate_assignment=True,
     )
 
-    _client: _NVIDIAClient = PrivateAttr(_NVIDIAClient)
+    _client: _NVIDIAClient = PrivateAttr()
 
     base_url: Optional[str] = Field(
         default=None,

diff --git a/libs/ai-endpoints/poetry.lock b/libs/ai-endpoints/poetry.lock
diff --git a/libs/ai-endpoints/pyproject.toml b/libs/ai-endpoints/pyproject.toml
@@ -31,7 +31,7 @@ syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
 requests-mock = "^1.11.0"
-langchain-tests = "0.3.1"
+langchain-tests = "^0.3.7"
 faker = "^24.4.0"
 
 [tool.poetry.group.codespell]

diff --git a/libs/ai-endpoints/tests/integration_tests/test_chat_models.py b/libs/ai-endpoints/tests/integration_tests/test_chat_models.py
@@ -236,6 +236,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_a(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode)
         llm.invoke("Show me the tokens")
+    assert llm._client.last_response
     assert llm._client.last_response.status_code in [400, 422]
     assert "max_tokens" in str(llm._client.last_response.content)
 
@@ -250,6 +251,7 @@ def test_ai_endpoints_invoke_max_tokens_negative_b(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, max_tokens=max_tokens, **mode)
         llm.invoke("Show me the tokens")
+    assert llm._client.last_response
     assert llm._client.last_response.status_code in [400, 422]
     # custom error string -
     #    model inference failed -- ValueError: A requested length of the model output
@@ -306,6 +308,7 @@ def test_ai_endpoints_invoke_seed_default(chat_model: str, mode: dict) -> None:
 def test_ai_endpoints_invoke_seed_range(chat_model: str, mode: dict, seed: int) -> None:
     llm = ChatNVIDIA(model=chat_model, seed=seed, **mode)
     llm.invoke("What's in a seed?")
+    assert llm._client.last_response
     assert llm._client.last_response.status_code == 200
 
 
@@ -332,6 +335,7 @@ def test_ai_endpoints_invoke_temperature_negative(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, temperature=temperature, **mode)
         llm.invoke("What's in a temperature?")
+    assert llm._client.last_response
     assert llm._client.last_response.status_code in [400, 422]
     assert "temperature" in str(llm._client.last_response.content)
 
@@ -360,6 +364,7 @@ def test_ai_endpoints_invoke_top_p_negative(
     with pytest.raises(Exception):
         llm = ChatNVIDIA(model=chat_model, top_p=top_p, **mode)
         llm.invoke("What's in a top_p?")
+    assert llm._client.last_response
     assert llm._client.last_response.status_code in [400, 422]
     assert "top_p" in str(llm._client.last_response.content)
 

diff --git a/libs/ai-endpoints/tests/integration_tests/test_standard.py b/libs/ai-endpoints/tests/integration_tests/test_standard.py
@@ -4,7 +4,8 @@
 
 import pytest
 from langchain_core.language_models import BaseChatModel
-from langchain_standard_tests.integration_tests import ChatModelIntegrationTests
+from langchain_core.tools import BaseTool
+from langchain_tests.integration_tests import ChatModelIntegrationTests
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA
 
@@ -19,5 +20,25 @@ def chat_model_params(self) -> dict:
         return {"model": "meta/llama-3.1-8b-instruct"}
 
     @pytest.mark.xfail(reason="anthropic-style list content not supported")
-    def test_tool_message_histories_list_content(self, model: BaseChatModel) -> None:
-        return super().test_tool_message_histories_list_content(model)
+    def test_tool_message_histories_list_content(
+        self, model: BaseChatModel, my_adder_tool: BaseTool
+    ) -> None:
+        return super().test_tool_message_histories_list_content(model, my_adder_tool)
+
+    @pytest.mark.xfail(reason="Empty AIMessage content not supported")
+    def test_tool_message_error_status(
+        self, model: BaseChatModel, my_adder_tool: BaseTool
+    ) -> None:
+        return super().test_tool_message_error_status(model, my_adder_tool)
+
+    @pytest.mark.xfail(reason="Empty AIMessage content not supported")
+    def test_tool_message_histories_string_content(
+        self, model: BaseChatModel, my_adder_tool: BaseTool
+    ) -> None:
+        return super().test_tool_message_histories_string_content(model, my_adder_tool)
+
+    @pytest.mark.xfail(
+        reason="Only one chunk should set input_tokens, the rest should be 0 or None"
+    )
+    def test_usage_metadata_streaming(self, model: BaseChatModel) -> None:
+        return super().test_usage_metadata_streaming(model)
diff --git a/libs/ai-endpoints/tests/unit_tests/test_standard.py b/libs/ai-endpoints/tests/unit_tests/test_standard.py
@@ -3,7 +3,7 @@
 from typing import Type
 
 from langchain_core.language_models import BaseChatModel
-from langchain_standard_tests.unit_tests import ChatModelUnitTests
+from langchain_tests.unit_tests import ChatModelUnitTests
 
 from langchain_nvidia_ai_endpoints import ChatNVIDIA