langchain-ai · mattf · Mar 25, 2024 · Mar 23, 2024
diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -49,6 +49,11 @@ def embed_query(self, text: str) -> List[float]:
 
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
         """Input pathway for document embeddings."""
+        if not isinstance(texts, list) or not all(
+            isinstance(text, str) for text in texts
+        ):
+            raise ValueError(f"`texts` must be a list of strings, given: {repr(texts)}")
+
         # From https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/nvolve-40k/documentation
         # The input must not exceed the 2048 max input characters and inputs above 512
         # model tokens will be truncated. The input array must not exceed 50 input

diff --git a/libs/ai-endpoints/poetry.lock b/libs/ai-endpoints/poetry.lock
diff --git a/libs/ai-endpoints/pyproject.toml b/libs/ai-endpoints/pyproject.toml
@@ -27,6 +27,7 @@ syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
 langchain-core = "^0.1.5"
+requests-mock = "^1.11.0"
 
 [tool.poetry.group.codespell]
 optional = true

diff --git a/libs/ai-endpoints/tests/unit_tests/test_embeddings.py b/libs/ai-endpoints/tests/unit_tests/test_embeddings.py
@@ -0,0 +1,82 @@
+from typing import Generator
+
+import pytest
+from requests_mock import Mocker
+
+from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
+
+
+@pytest.fixture
+def embedding(requests_mock: Mocker) -> Generator[NVIDIAEmbeddings, None, None]:
+    model = "mock-model"
+    requests_mock.get(
+        "https://api.nvcf.nvidia.com/v2/nvcf/functions",
+        json={
+            "functions": [
+                {
+                    "id": "ID",
+                    "ncaId": "NCA-ID",
+                    "versionId": "VERSION-ID",
+                    "name": model,
+                    "status": "ACTIVE",
+                    "ownedByDifferentAccount": True,
+                    "apiBodyFormat": "CUSTOM",
+                    "healthUri": "/v2/health/ready",
+                    "createdAt": "0000-00-00T00:00:00.000Z",
+                }
+            ]
+        },
+    )
+    requests_mock.post(
+        "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/ID",
+        json={
+            "data": [
+                {
+                    "embedding": [
+                        0.1,
+                        0.2,
+                        0.3,
+                    ],
+                    "index": 0,
+                }
+            ],
+            "usage": {"prompt_tokens": 8, "total_tokens": 8},
+        },
+    )
+    yield NVIDIAEmbeddings(model=model, nvidia_api_key="a-bogus-key")
+
+
+def test_embed_documents_negative_input_int(embedding: NVIDIAEmbeddings) -> None:
+    documents = 1
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_float(embedding: NVIDIAEmbeddings) -> None:
+    documents = 1.0
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_str(embedding: NVIDIAEmbeddings) -> None:
+    documents = "subscriptable string, not a list"
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_list_int(embedding: NVIDIAEmbeddings) -> None:
+    documents = [1, 2, 3]
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_list_float(embedding: NVIDIAEmbeddings) -> None:
+    documents = [1.0, 2.0, 3.0]
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_list_mixed(embedding: NVIDIAEmbeddings) -> None:
+    documents = ["1", 2.0, 3]
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore