From 0510cd06a008aa7d10b29c0a147dc479dac023ea Mon Sep 17 00:00:00 2001
From: Matthew Farrellee <matt@cs.wisc.edu>
Date: Sat, 23 Mar 2024 07:58:09 -0400
Subject: [PATCH] validate NVIDIAEmbeddings.embed_documents input type is
 list[str]

resolves https://github.com/langchain-ai/langchain-nvidia/issues/8
---
 .../embeddings.py                             |  5 ++
 libs/ai-endpoints/poetry.lock                 | 21 ++++-
 libs/ai-endpoints/pyproject.toml              |  1 +
 .../tests/unit_tests/test_embeddings.py       | 82 +++++++++++++++++++
 4 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 libs/ai-endpoints/tests/unit_tests/test_embeddings.py

diff --git a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
index 9fa8ed41..4bb7628a 100644
--- a/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
+++ b/libs/ai-endpoints/langchain_nvidia_ai_endpoints/embeddings.py
@@ -49,6 +49,11 @@ def embed_query(self, text: str) -> List[float]:
 
     def embed_documents(self, texts: List[str]) -> List[List[float]]:
         """Input pathway for document embeddings."""
+        if not isinstance(texts, list) or not all(
+            isinstance(text, str) for text in texts
+        ):
+            raise ValueError(f"`texts` must be a list of strings, given: {repr(texts)}")
+
         # From https://catalog.ngc.nvidia.com/orgs/nvidia/teams/ai-foundation/models/nvolve-40k/documentation
         # The input must not exceed the 2048 max input characters and inputs above 512
         # model tokens will be truncated. The input array must not exceed 50 input
diff --git a/libs/ai-endpoints/poetry.lock b/libs/ai-endpoints/poetry.lock
index 9e5d0bd3..41839978 100644
--- a/libs/ai-endpoints/poetry.lock
+++ b/libs/ai-endpoints/poetry.lock
@@ -1122,6 +1122,25 @@ urllib3 = ">=1.21.1,<3"
 socks = ["PySocks (>=1.5.6,!=1.5.7)"]
 use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
+[[package]]
+name = "requests-mock"
+version = "1.11.0"
+description = "Mock out responses from the requests package"
+optional = false
+python-versions = "*"
+files = [
+    {file = "requests-mock-1.11.0.tar.gz", hash = "sha256:ef10b572b489a5f28e09b708697208c4a3b2b89ef80a9f01584340ea357ec3c4"},
+    {file = "requests_mock-1.11.0-py2.py3-none-any.whl", hash = "sha256:f7fae383f228633f6bececebdab236c478ace2284d6292c6e7e2867b9ab74d15"},
+]
+
+[package.dependencies]
+requests = ">=2.3,<3"
+six = "*"
+
+[package.extras]
+fixture = ["fixtures"]
+test = ["fixtures", "mock", "purl", "pytest", "requests-futures", "sphinx", "testtools"]
+
 [[package]]
 name = "ruff"
 version = "0.1.15"
@@ -1409,4 +1428,4 @@ multidict = ">=4.0"
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "28b85aa52e8aaa1cb83dd8dfba132d118628a678bd9a228227a992394fedf7de"
+content-hash = "90a276c531fed21a235d02a914b9bc6ce7425e3666053c184f1a423668d1bf22"
diff --git a/libs/ai-endpoints/pyproject.toml b/libs/ai-endpoints/pyproject.toml
index a0d01c5a..5c1197b2 100644
--- a/libs/ai-endpoints/pyproject.toml
+++ b/libs/ai-endpoints/pyproject.toml
@@ -27,6 +27,7 @@ syrupy = "^4.0.2"
 pytest-watcher = "^0.3.4"
 pytest-asyncio = "^0.21.1"
 langchain-core = "^0.1.5"
+requests-mock = "^1.11.0"
 
 [tool.poetry.group.codespell]
 optional = true
diff --git a/libs/ai-endpoints/tests/unit_tests/test_embeddings.py b/libs/ai-endpoints/tests/unit_tests/test_embeddings.py
new file mode 100644
index 00000000..0cfd96cf
--- /dev/null
+++ b/libs/ai-endpoints/tests/unit_tests/test_embeddings.py
@@ -0,0 +1,82 @@
+from typing import Generator
+
+import pytest
+from requests_mock import Mocker
+
+from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
+
+
+@pytest.fixture
+def embedding(requests_mock: Mocker) -> Generator[NVIDIAEmbeddings, None, None]:
+    model = "mock-model"
+    requests_mock.get(
+        "https://api.nvcf.nvidia.com/v2/nvcf/functions",
+        json={
+            "functions": [
+                {
+                    "id": "ID",
+                    "ncaId": "NCA-ID",
+                    "versionId": "VERSION-ID",
+                    "name": model,
+                    "status": "ACTIVE",
+                    "ownedByDifferentAccount": True,
+                    "apiBodyFormat": "CUSTOM",
+                    "healthUri": "/v2/health/ready",
+                    "createdAt": "0000-00-00T00:00:00.000Z",
+                }
+            ]
+        },
+    )
+    requests_mock.post(
+        "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/ID",
+        json={
+            "data": [
+                {
+                    "embedding": [
+                        0.1,
+                        0.2,
+                        0.3,
+                    ],
+                    "index": 0,
+                }
+            ],
+            "usage": {"prompt_tokens": 8, "total_tokens": 8},
+        },
+    )
+    yield NVIDIAEmbeddings(model=model, nvidia_api_key="a-bogus-key")
+
+
+def test_embed_documents_negative_input_int(embedding: NVIDIAEmbeddings) -> None:
+    documents = 1
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_float(embedding: NVIDIAEmbeddings) -> None:
+    documents = 1.0
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_str(embedding: NVIDIAEmbeddings) -> None:
+    documents = "subscriptable string, not a list"
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_list_int(embedding: NVIDIAEmbeddings) -> None:
+    documents = [1, 2, 3]
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_list_float(embedding: NVIDIAEmbeddings) -> None:
+    documents = [1.0, 2.0, 3.0]
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore
+
+
+def test_embed_documents_negative_input_list_mixed(embedding: NVIDIAEmbeddings) -> None:
+    documents = ["1", 2.0, 3]
+    with pytest.raises(ValueError):
+        embedding.embed_documents(documents)  # type: ignore