From d471261af92dfff33cf6bb81386c82cc38fdb8dd Mon Sep 17 00:00:00 2001 From: Alon Parag Date: Fri, 26 Apr 2024 15:47:23 +0200 Subject: [PATCH 1/3] async-documentdb --- .../langchain_aws/vectorstores/__init__.py | 49 ++ .../vectorstores/documentdb_new.py | 689 +++++++++++++++++ libs/aws/poetry.lock | 167 +++- libs/aws/pyproject.toml | 6 + .../vectorstores/__init__.py | 0 .../vectorstores/test_documentdb.py | 728 ++++++++++++++++++ 6 files changed, 1615 insertions(+), 24 deletions(-) create mode 100644 libs/aws/langchain_aws/vectorstores/__init__.py create mode 100644 libs/aws/langchain_aws/vectorstores/documentdb_new.py create mode 100644 libs/aws/tests/integration_tests/vectorstores/__init__.py create mode 100644 libs/aws/tests/integration_tests/vectorstores/test_documentdb.py diff --git a/libs/aws/langchain_aws/vectorstores/__init__.py b/libs/aws/langchain_aws/vectorstores/__init__.py new file mode 100644 index 00000000..61ad8fb4 --- /dev/null +++ b/libs/aws/langchain_aws/vectorstores/__init__.py @@ -0,0 +1,49 @@ +"""**Vector store** stores embedded data and performs vector search. + +One of the most common ways to store and search over unstructured data is to +embed it and store the resulting embedding vectors, and then query the store +and retrieve the data that are 'most similar' to the embedded query. + +**Class hierarchy:** + +.. code-block:: + + VectorStore --> # Examples: Annoy, FAISS, Milvus + + BaseRetriever --> VectorStoreRetriever --> Retriever # Example: VespaRetriever + +**Main helpers:** + +.. code-block:: + + Embeddings, Document +""" # noqa: E501 + +import importlib +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from langchain_core.vectorstores import ( + VectorStore, # noqa: F401 + ) + + from langchain_aws.vectorstores.documentdb import ( + DocumentDBVectorSearch, # noqa: F401 + ) +__all__ = [ + "DocumentDBVectorSearch", +] + +_module_lookup = { + "DocumentDBVectorSearch": "langchain_community.vectorstores.documentdb", +} + + +def __getattr__(name: str) -> Any: + if name in _module_lookup: + module = importlib.import_module(_module_lookup[name]) + return getattr(module, name) + raise AttributeError(f"module {__name__} has no attribute {name}") + + +__all__ = list(_module_lookup.keys()) diff --git a/libs/aws/langchain_aws/vectorstores/documentdb_new.py b/libs/aws/langchain_aws/vectorstores/documentdb_new.py new file mode 100644 index 00000000..a6ea3943 --- /dev/null +++ b/libs/aws/langchain_aws/vectorstores/documentdb_new.py @@ -0,0 +1,689 @@ +from __future__ import annotations + +import logging +from enum import Enum +from typing import ( + TYPE_CHECKING, + Any, + Dict, + Generator, + Iterable, + List, + Optional, + TypeVar, + Union, +) + +from langchain_core.documents import Document +from langchain_core.vectorstores import VectorStore + +if TYPE_CHECKING: + from langchain_core.embeddings import Embeddings + from motor.core import AgnosticClient, AgnosticCollection # noqa: F401 + from pymongo.collection import Collection + + +# Before Python 3.11 native StrEnum is not available +class DocumentDBSimilarityType(str, Enum): + """DocumentDB Similarity Type as enumerator.""" + + COS = "cosine" + """Cosine similarity""" + DOT = "dotProduct" + """Dot product""" + EUC = "euclidean" + """Euclidean distance""" + + +DocumentDBDocumentType = TypeVar("DocumentDBDocumentType", bound=Dict[str, Any]) + +logger = logging.getLogger(__name__) + +DEFAULT_INSERT_BATCH_SIZE = 128 + + +class DocumentDBVectorSearch(VectorStore): + """`Amazon DocumentDB (with MongoDB compatibility)` vector store. + Please refer to the official Vector Search documentation for more details: + https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html + + To use, you should have both: + - the ``pymongo`` python package installed + - a connection string and credentials associated with a DocumentDB cluster + + Example: + . code-block:: python + + from langchain_community.vectorstores import DocumentDBVectorSearch + from langchain_community.embeddings.openai import OpenAIEmbeddings + from pymongo import MongoClient + + mongo_client = MongoClient("") + collection = mongo_client[""][""] + embeddings = OpenAIEmbeddings() + vectorstore = DocumentDBVectorSearch(collection, embeddings) + """ + + def __init__( + self, + collection: Collection[DocumentDBDocumentType], + embedding: Embeddings, + *, + index_name: str = "vectorSearchIndex", + text_key: str = "textContent", + embedding_key: str = "vectorContent", + is_async: bool = False, + async_collection: AgnosticCollection[DocumentDBDocumentType], + ): + """Constructor for DocumentDBVectorSearch + + Args: + collection: MongoDB collection to add the texts to. + embedding: Text embedding model to use. + index_name: Name of the Vector Search index. + text_key: MongoDB field that will contain the text + for each document. + embedding_key: MongoDB field that will contain the embedding + for each document. + """ + self._collection = collection + self._embedding = embedding + self._index_name = index_name + self._text_key = text_key + self._embedding_key = embedding_key + self._similarity_type = DocumentDBSimilarityType.COS + self.is_async = is_async + if is_async and async_collection is None: + raise ValueError( + f"Expecting `async_collection` when `is_async` is defined.\n \ + Go async_collection = `{async_collection}`" + ) + self._async_collection = async_collection + + @property + def embeddings(self) -> Embeddings: + return self._embedding + + def validate_async(self) -> None: + if not self.is_async: + raise RuntimeError( + f"Async functions can only be called \n \ + when the object `is_async` flag is `True`.\n \ + is_async = `{self.is_async}`" + ) + + def get_index_name(self) -> str: + """Returns the index name + + Returns: + Returns the index name + + """ + return self._index_name + + @classmethod + def from_connection_string( + cls, + connection_string: str, + namespace: str, + embedding: Embeddings, + **kwargs: Any, + ) -> DocumentDBVectorSearch: + """Creates an Instance of DocumentDBVectorSearch from a Connection String + + Args: + connection_string: The DocumentDB cluster endpoint connection string + namespace: The namespace (database.collection) + embedding: The embedding utility + **kwargs: Dynamic keyword arguments + + Returns: + an instance of the vector store + + """ + try: + from pymongo import MongoClient + except ImportError: + raise ImportError( + "Could not import pymongo, please install it with " + "`pip install pymongo`." + ) + client: MongoClient = MongoClient(connection_string) + db_name, collection_name = namespace.split(".") + collection = client[db_name][collection_name] + return cls(collection, embedding, **kwargs) + + @classmethod + def afrom_connection_string( + cls, + connection_string: str, + namespace: str, + embedding: Embeddings, + **kwargs: Any, + ) -> "DocumentDBVectorSearch": + """Creates an Instance of DocumentDBVectorSearch from a Connection + String. + + Args: + connection_string: The DocumentDB cluster endpoint connection string + namespace: The namespace (database.collection) + embedding: The embedding utility + **kwargs: Dynamic keyword arguments + + Returns: + an instance of the vector store + """ + try: + from pymongo import MongoClient + except ImportError: + raise ImportError( + "Could not import pymongo, please install it with " + "`pip install pymongo`." + ) + client: MongoClient = MongoClient(connection_string) + try: + from motor.core import AgnosticClient # noqa: F811 F401 + from motor.motor_asyncio import AsyncIOMotorClient + except ImportError: + raise ImportError( + "Could not import motor, please install it with " "`pip install motor`." + ) + async_client: AgnosticClient = AsyncIOMotorClient(connection_string) + db_name, collection_name = namespace.split(".") + collection = client[db_name][collection_name] + async_collection = async_client[db_name][collection_name] + return cls( + collection, + embedding, + is_async=True, + async_collection=async_collection, + **kwargs, + ) + + def index_exists(self) -> bool: + """Verifies if the specified index name during instance + construction exists on the collection + + Returns: + Returns True on success and False if no such index exists + on the collection + """ + cursor = self._collection.list_indexes() + index_name = self._index_name + + for res in cursor: + current_index_name = res.pop("name") + if current_index_name == index_name: + return True + + return False + + async def aindex_exists(self) -> bool: + """Verifies if the specified index name during instance construction + exists on the collection. + + Returns: + Returns True on success and False if no such index exists + on the collection + """ + self.validate_async() + cursor = self._async_collection.list_indexes() + index_name = self._index_name + + async for res in cursor: + current_index_name = res.pop("name") + if current_index_name == index_name: + return True + + return False + + def delete_index(self) -> None: + """Deletes the index specified during instance construction if it exists""" + if self.index_exists(): + self._collection.drop_index(self._index_name) + # Raises OperationFailure on an error (e.g. trying to drop + # an index that does not exist) + + async def adelete_index(self) -> None: + """Deletes the index specified during instance construction if it + exists.""" + self.validate_async() + if await self.aindex_exists(): + await self._async_collection.drop_index(self._index_name) + # Raises OperationFailure on an error (e.g. trying to drop + # an index that does not exist) + + def create_index( + self, + dimensions: int = 1536, + similarity: DocumentDBSimilarityType = DocumentDBSimilarityType.COS, + m: int = 16, + ef_construction: int = 64, + ) -> dict[str, Any]: + """Creates an index using the index name specified at + instance construction + + Args: + dimensions: Number of dimensions for vector similarity. + The maximum number of supported dimensions is 2000 + + similarity: Similarity algorithm to use with the HNSW index. + + m: Specifies the max number of connections for an HNSW index. + Large impact on memory consumption. + + ef_construction: Specifies the size of the dynamic candidate list + for constructing the graph for HNSW index. Higher values lead + to more accurate results but slower indexing speed. + + Possible options are: + - DocumentDBSimilarityType.COS (cosine distance), + - DocumentDBSimilarityType.EUC (Euclidean distance), and + - DocumentDBSimilarityType.DOT (dot product). + + Returns: + An object describing the created index + + """ + self._similarity_type = similarity + + # prepare the command + create_index_commands = { + "createIndexes": self._collection.name, + "indexes": [ + { + "name": self._index_name, + "key": {self._embedding_key: "vector"}, + "vectorOptions": { + "type": "hnsw", + "similarity": similarity, + "dimensions": dimensions, + "m": m, + "efConstruction": ef_construction, + }, + } + ], + } + + # retrieve the database object + current_database = self._collection.database + + # invoke the command from the database object + create_index_responses: dict[str, Any] = current_database.command( + create_index_commands + ) + + return create_index_responses + + async def acreate_index( + self, + dimensions: int = 1536, + similarity: DocumentDBSimilarityType = DocumentDBSimilarityType.COS, + m: int = 16, + ef_construction: int = 64, + ) -> dict[str, Any]: + """Creates an index using the index name specified at instance + construction. + + Args: + dimensions: Number of dimensions for vector similarity. + The maximum number of supported dimensions is 2000 + + similarity: Similarity algorithm to use with the HNSW index. + + m: Specifies the max number of connections for an HNSW index. + Large impact on memory consumption. + + ef_construction: Specifies the size of the dynamic candidate list + for constructing the graph for HNSW index. Higher values lead + to more accurate results but slower indexing speed. + + Possible options are: + - DocumentDBSimilarityType.COS (cosine distance), + - DocumentDBSimilarityType.EUC (Euclidean distance), and + - DocumentDBSimilarityType.DOT (dot product). + + Returns: + An object describing the created index + """ + self.validate_async() + self._similarity_type = similarity + + # prepare the command + create_index_commands = { + "createIndexes": self._async_collection.name, + "indexes": [ + { + "name": self._index_name, + "key": {self._embedding_key: "vector"}, + "vectorOptions": { + "type": "hnsw", + "similarity": similarity, + "dimensions": dimensions, + "m": m, + "efConstruction": ef_construction, + }, + } + ], + } + + # retrieve the database object + current_database = self._async_collection.database + + # invoke the command from the database object + create_index_responses: dict[str, Any] = await current_database.command( + create_index_commands + ) + + return create_index_responses + + def add_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[Dict[str, Any]]] = None, + **kwargs: Any, + ) -> List: + batch_size = kwargs.get("batch_size", DEFAULT_INSERT_BATCH_SIZE) + _metadatas: Union[List, Generator] = metadatas or ({} for _ in texts) + texts_batch = [] + metadatas_batch = [] + result_ids = [] + for i, (text, metadata) in enumerate(zip(texts, _metadatas)): + texts_batch.append(text) + metadatas_batch.append(metadata) + if (i + 1) % batch_size == 0: + result_ids.extend(self._insert_texts(texts_batch, metadatas_batch)) + texts_batch = [] + metadatas_batch = [] + if texts_batch: + result_ids.extend(self._insert_texts(texts_batch, metadatas_batch)) + return result_ids + + async def aadd_texts( + self, + texts: Iterable[str], + metadatas: Optional[List[Dict[str, Any]]] = None, + **kwargs: Any, + ) -> List: + self.validate_async() + batch_size = kwargs.get("batch_size", DEFAULT_INSERT_BATCH_SIZE) + _metadatas: Union[List, Generator] = metadatas or ({} for _ in texts) + texts_batch = [] + metadatas_batch = [] + result_ids = [] + for i, (text, metadata) in enumerate(zip(texts, _metadatas)): + texts_batch.append(text) + metadatas_batch.append(metadata) + if (i + 1) % batch_size == 0: + new_result_ids = await self._ainsert_texts(texts_batch, metadatas_batch) + result_ids.extend(new_result_ids) + texts_batch = [] + metadatas_batch = [] + if texts_batch: + new_result_ids = await self._ainsert_texts(texts_batch, metadatas_batch) + result_ids.extend(new_result_ids) + return result_ids + + async def _ainsert_texts( + self, texts: List[str], metadatas: List[Dict[str, Any]] + ) -> List: + """Used to Load Documents into the collection. + + Args: + texts: The list of documents strings to load + metadatas: The list of metadata objects associated with each document + + Returns: + """ + self.validate_async() + # If the text is empty, then exit early + if not texts: + return [] + + # Embed and create the documents + embeddings = self._embedding.embed_documents(texts) + to_insert = [ + {self._text_key: t, self._embedding_key: embedding, **m} + for t, m, embedding in zip(texts, metadatas, embeddings) + ] + # insert the documents in DocumentDB + insert_result = await self._async_collection.insert_many(to_insert) # type: ignore + return insert_result.inserted_ids + + def _insert_texts(self, texts: List[str], metadatas: List[Dict[str, Any]]) -> List: + """Used to Load Documents into the collection + + Args: + texts: The list of documents strings to load + metadatas: The list of metadata objects associated with each document + + Returns: + + """ + # If the text is empty, then exit early + if not texts: + return [] + + # Embed and create the documents + embeddings = self._embedding.embed_documents(texts) + to_insert = [ + {self._text_key: t, self._embedding_key: embedding, **m} + for t, m, embedding in zip(texts, metadatas, embeddings) + ] + # insert the documents in DocumentDB + insert_result = self._collection.insert_many(to_insert) # type: ignore + return insert_result.inserted_ids + + @classmethod + def from_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + collection: Optional[Collection[DocumentDBDocumentType]] = None, + **kwargs: Any, + ) -> DocumentDBVectorSearch: + if collection is None: + raise ValueError("Must provide 'collection' named parameter.") + vectorstore = cls(collection, embedding, **kwargs) + vectorstore.add_texts(texts, metadatas=metadatas) + return vectorstore + + @classmethod + async def afrom_texts( + cls, + texts: List[str], + embedding: Embeddings, + metadatas: Optional[List[dict]] = None, + collection: Optional[Collection[DocumentDBDocumentType]] = None, + async_collection: Optional[AgnosticCollection[DocumentDBDocumentType]] = None, + **kwargs: Any, + ) -> DocumentDBVectorSearch: + if collection is None or async_collection is None: + raise ValueError( + f"Must provide 'collection' and `async_collection` named parameters.\n \ + got collection: `{collection}`\n \ + async_collection: `{async_collection}`" + ) + vectorstore = cls( + collection, + embedding, + is_async=True, + async_collection=async_collection, + **kwargs, + ) + await vectorstore.aadd_texts(texts, metadatas=metadatas) + return vectorstore + + def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]: + if ids is None: + raise ValueError("No document ids provided to delete.") + + for document_id in ids: + self.delete_document_by_id(document_id) + return True + + async def adelete( + self, ids: Optional[List[str]] = None, **kwargs: Any + ) -> Optional[bool]: + self.validate_async() + if ids is None: + raise ValueError("No document ids provided to delete.") + + for document_id in ids: + await self.adelete_document_by_id(document_id) + return True + + def delete_document_by_id(self, document_id: Optional[str] = None) -> None: + """Removes a Specific Document by Id + + Args: + document_id: The document identifier + """ + try: + from bson.objectid import ObjectId + except ImportError as e: + raise ImportError( + "Unable to import bson, please install with `pip install bson`." + ) from e + if document_id is None: + raise ValueError("No document id provided to delete.") + + self._collection.delete_one({"_id": ObjectId(document_id)}) + + async def adelete_document_by_id(self, document_id: Optional[str] = None) -> None: + """Removes a Specific Document by Id. + + Args: + document_id: The document identifier + """ + self.validate_async() + try: + from bson.objectid import ObjectId + except ImportError as e: + raise ImportError( + "Unable to import bson, please install with `pip install bson`." + ) from e + if document_id is None: + raise ValueError("No document id provided to delete.") + + await self._async_collection.delete_one({"_id": ObjectId(document_id)}) + + def _similarity_search_without_score( + self, + embeddings: List[float], + k: int = 4, + ef_search: int = 40, + agg_operators: List[Dict[str, Any]] | None = None, + ) -> List[Document]: + """Returns a list of documents. + + Args: + embeddings: The query vector + k: the number of documents to return + ef_search: Specifies the size of the dynamic candidate list + that HNSW index uses during search. A higher value of + efSearch provides better recall at cost of speed. + + Returns: + A list of documents closest to the query vector + """ + pipeline: List[dict[str, Any]] = [ + { + "$search": { + "vectorSearch": { + "vector": embeddings, + "path": self._embedding_key, + "similarity": self._similarity_type, + "k": k, + "efSearch": ef_search, + } + } + } + ] + if agg_operators: + pipeline = agg_operators + pipeline + cursor = self._collection.aggregate(pipeline) + + docs = [] + + for res in cursor: + text = res.pop(self._text_key) + docs.append(Document(page_content=text, metadata=res)) + + return docs + + async def _asimilarity_search_without_score( + self, + embeddings: List[float], + k: int = 4, + ef_search: int = 40, + agg_operators: List[Dict[str, Any]] | None = None, + ) -> List[Document]: + """Returns a list of documents. + + Args: + embeddings: The query vector + k: the number of documents to return + ef_search: Specifies the size of the dynamic candidate list + that HNSW index uses during search. A higher value of + efSearch provides better recall at cost of speed. + agg_operators: Any additional mongodb aggregation operators + + Returns: + A list of documents closest to the query vector + """ + self.validate_async() + pipeline: List[dict[str, Any]] = [ + { + "$search": { + "vectorSearch": { + "vector": embeddings, + "path": self._embedding_key, + "similarity": self._similarity_type, + "k": k, + "efSearch": ef_search, + } + } + } + ] + if agg_operators: + pipeline = agg_operators + pipeline + cursor = self._async_collection.aggregate(pipeline) + + docs = [] + + async for res in cursor: + text = res.pop(self._text_key) + docs.append(Document(page_content=text, metadata=res)) + + return docs + + def similarity_search( + self, + query: str, + k: int = 4, + ef_search: int = 40, + agg_operators: List[Dict[str, Any]] | None = None, + **kwargs: Any, + ) -> List[Document]: + embeddings = self._embedding.embed_query(query) + docs = self._similarity_search_without_score( + embeddings=embeddings, k=k, ef_search=ef_search + ) + return [doc for doc in docs] + + async def asimilarity_search( + self, + query: str, + k: int = 4, + ef_search: int = 40, + agg_operators: List[Dict[str, Any]] | None = None, + **kwargs: Any, + ) -> List[Document]: + self.validate_async() + embeddings = self._embedding.embed_query(query) + docs = await self._asimilarity_search_without_score( + embeddings=embeddings, k=k, ef_search=ef_search, agg_operators=agg_operators + ) + return [doc for doc in docs] diff --git a/libs/aws/poetry.lock b/libs/aws/poetry.lock index bca8815a..3f681263 100644 --- a/libs/aws/poetry.lock +++ b/libs/aws/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "annotated-types" @@ -16,17 +16,17 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""} [[package]] name = "boto3" -version = "1.34.90" +version = "1.34.92" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.34.90-py3-none-any.whl", hash = "sha256:b2e5cb5b95efcc881e25a3bc872d7a24e75ff4e76f368138e4baf7b9d6ee3422"}, - {file = "boto3-1.34.90.tar.gz", hash = "sha256:2824e3dd18743ca50e5b10439d20e74647b1416e8a94509cb30beac92d27a18d"}, + {file = "boto3-1.34.92-py3-none-any.whl", hash = "sha256:db7bbb1c6059e99b74dcf634e497b04addcac4c527ae2b2696e47c39eccc6c50"}, + {file = "boto3-1.34.92.tar.gz", hash = "sha256:684cba753d64978a486e8ea9645d53de0d4e3b4a3ab1495b26bd04b9541cea2d"}, ] [package.dependencies] -botocore = ">=1.34.90,<1.35.0" +botocore = ">=1.34.92,<1.35.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -35,13 +35,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.34.90" +version = "1.34.92" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.34.90-py3-none-any.whl", hash = "sha256:d48f152498e2c60b43ce25b579d26642346a327b6fb2c632d57219e0a4f63392"}, - {file = "botocore-1.34.90.tar.gz", hash = "sha256:113cd4c0cb63e13163ccbc2bb13d551be314ba7f8ba5bfab1c51a19ca01aa133"}, + {file = "botocore-1.34.92-py3-none-any.whl", hash = "sha256:4211a22a1f6c6935e70cbb84c2cd93b29f9723eaf5036d59748dd104f389a681"}, + {file = "botocore-1.34.92.tar.gz", hash = "sha256:d1ca4886271f184445ec737cd2e752498648cca383887c5a37b2e01c8ab94039"}, ] [package.dependencies] @@ -260,6 +260,26 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1 [package.extras] toml = ["tomli"] +[[package]] +name = "dnspython" +version = "2.6.1" +description = "DNS toolkit" +optional = true +python-versions = ">=3.8" +files = [ + {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"}, + {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"}, +] + +[package.extras] +dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "sphinx (>=7.2.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"] +dnssec = ["cryptography (>=41)"] +doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"] +doq = ["aioquic (>=0.9.25)"] +idna = ["idna (>=3.6)"] +trio = ["trio (>=0.23)"] +wmi = ["wmi (>=1.5.1)"] + [[package]] name = "exceptiongroup" version = "1.2.1" @@ -334,7 +354,7 @@ files = [ [[package]] name = "langchain-core" -version = "0.1.45" +version = "0.1.46" description = "Building applications with LLMs through composability" optional = false python-versions = ">=3.8.1,<4.0" @@ -356,18 +376,18 @@ extended-testing = ["jinja2 (>=3,<4)"] type = "git" url = "https://github.com/langchain-ai/langchain.git" reference = "HEAD" -resolved_reference = "87d31a3ec0d4aeb7fe3af90f00511677c38f3a3b" +resolved_reference = "4c437ebb9c2fb532ce655ac1e0c354c82a715df7" subdirectory = "libs/core" [[package]] name = "langsmith" -version = "0.1.50" +version = "0.1.51" description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform." optional = false python-versions = "<4.0,>=3.8.1" files = [ - {file = "langsmith-0.1.50-py3-none-any.whl", hash = "sha256:a81e9809fcaa277bfb314d729e58116554f186d1478fcfdf553b1c2ccce54b85"}, - {file = "langsmith-0.1.50.tar.gz", hash = "sha256:9fd22df8c689c044058536ea5af66f5302067e7551b60d7a335fede8d479572b"}, + {file = "langsmith-0.1.51-py3-none-any.whl", hash = "sha256:1e7363a3f472ecf02a1d91f6dbacde25519554b98c490be71716fcffaab0ca6b"}, + {file = "langsmith-0.1.51.tar.gz", hash = "sha256:b99b40a8c00e66174540865caa61412622fa1dc4f02602965364919c90528f97"}, ] [package.dependencies] @@ -375,6 +395,30 @@ orjson = ">=3.9.14,<4.0.0" pydantic = ">=1,<3" requests = ">=2,<3" +[[package]] +name = "motor" +version = "3.4.0" +description = "Non-blocking MongoDB driver for Tornado or asyncio" +optional = true +python-versions = ">=3.7" +files = [ + {file = "motor-3.4.0-py3-none-any.whl", hash = "sha256:4b1e1a0cc5116ff73be2c080a72da078f2bb719b53bc7a6bb9e9a2f7dcd421ed"}, + {file = "motor-3.4.0.tar.gz", hash = "sha256:c89b4e4eb2e711345e91c7c9b122cb68cce0e5e869ed0387dd0acb10775e3131"}, +] + +[package.dependencies] +pymongo = ">=4.5,<5" + +[package.extras] +aws = ["pymongo[aws] (>=4.5,<5)"] +encryption = ["pymongo[encryption] (>=4.5,<5)"] +gssapi = ["pymongo[gssapi] (>=4.5,<5)"] +ocsp = ["pymongo[ocsp] (>=4.5,<5)"] +snappy = ["pymongo[snappy] (>=4.5,<5)"] +srv = ["pymongo[srv] (>=4.5,<5)"] +test = ["aiohttp (!=3.8.6)", "mockupdb", "motor[encryption]", "pytest (>=7)", "tornado (>=5)"] +zstd = ["pymongo[zstd] (>=4.5,<5)"] + [[package]] name = "mypy" version = "1.10.0" @@ -666,6 +710,87 @@ files = [ [package.dependencies] typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0" +[[package]] +name = "pymongo" +version = "4.7.0" +description = "Python driver for MongoDB " +optional = true +python-versions = ">=3.7" +files = [ + {file = "pymongo-4.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8449b6af19cac09cce9d0834c196b29b72b29e05724f4ea208b3f602fdd47086"}, + {file = "pymongo-4.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb00787bed1939ef21ffcb09b3034b193c3c6e9838724e2c05ef881cb2b03a33"}, + {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8c4cbe5a1258b9f3a49f83781c8b2fb58f39a682779a3c81dc444a609cb15ba"}, + {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12db8e8768bd0d4a433eea3463f05648c3f65f262776c777a0e19e7c55f27a73"}, + {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7be2e57df38fa9b1b6f9ebe5bedd38118b511d3bdf0d9e77158c476542c9153d"}, + {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b2b49670b32df8cf6650133cf439593f0291228ce971094c62c3a478024c7d1"}, + {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5366f28b2115120611536914540b0d247a89b09bb80bbc78893f246a584165b9"}, + {file = "pymongo-4.7.0-cp310-cp310-win32.whl", hash = "sha256:6c993fff4c110f6de4d76b76af97733efecae83b688cb27d1a3c5431415e3803"}, + {file = "pymongo-4.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:66b490775aa4542e0585ffdff1d0c6c4279536c852334f34a6a9a5c882beafd4"}, + {file = "pymongo-4.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9584be3d20ee26b53c0b1e25ba38196b7f65f594f48211b5ab3fa12b428ec6a9"}, + {file = "pymongo-4.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db2885773af0c10420e6bb86e84ee780bc3817d45a29ef24d8f6376ae2351eec"}, + {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8af3de7fea21b1ced0770766ec37a5900a62b45fe4b8f1dfa521226d591dbf66"}, + {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78b0ba6d60c7f2ac779909ac53383c83584826a304206559599c46a33366622a"}, + {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c82105c91cf95821039aca48350630435e7be18989496b6292aaa8779fa5fb6"}, + {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44eb2a3adaa0916f2fb6812d4d805956fd376b7fceae3b62f5dfae5e29330786"}, + {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2161278182f3163d15afc3c578097ec20c844ac7180e41134a2a2b5c9ae77b9d"}, + {file = "pymongo-4.7.0-cp311-cp311-win32.whl", hash = "sha256:98cb932ab936d702e28cf8da1982dcf5e7cfc35736b7516c0df7aaa46c63e0e2"}, + {file = "pymongo-4.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:3f1d57edc2a4bd96ae5741e4d83d3d54695174fd9068c88c89e12f7262be4de4"}, + {file = "pymongo-4.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:36d05d1ff861dda7c9e84d9848ea6f2b5d2245ae1093865d14597de29ba95b37"}, + {file = "pymongo-4.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ad32bb7e5f889fc5994001f7bb8bf945b52e10e428a563dfce0661961eae224"}, + {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8885f825203fa14ce863b462effcd93e07bfc6e582b3b93cfcde5ae42ccc9923"}, + {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf4187bc91bd10e29857775651101d0ec26e580d6b46a8c5cbf93928358ac3c3"}, + {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aebd99aaea95c48fba24bc3d7b72e7bf70e06df4c647de938c4d3dce2fd25a1c"}, + {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52facf98dcba501b2ae337d21f065cc30ceb25b97ce8f17878c1ae9d781f7f26"}, + {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f807dadc8030a5b55915f78fac25393af47bee8ccb62b5a6c5c622274ff4adf1"}, + {file = "pymongo-4.7.0-cp312-cp312-win32.whl", hash = "sha256:7a3c9218c5bc4384fa079f41b744473ada6a5f549fc11a4ae0fe7287746acc04"}, + {file = "pymongo-4.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:97ccb53d9310d5963df1a4543f1cfabdfd914638a5c8438234f6ed70d9303222"}, + {file = "pymongo-4.7.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:41d647fdaedba2f5b5c92299575814c164af44696fed3a4fc0d0df4f29eabcb2"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f53cf5bf65dda3fc1b5ec5f760233a41b282db3157d135e9272101f0492825f"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6673daf8fc23a96934cbb7a3626dcfa3ae21510492047e6003dfe3f26e62886b"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d7fc4891f5482e42c35be6931e9cf6b635d7d95056ff45b56bae5f0384830f"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fc34b4d92d5d8671be6b728076f275ccfe8495c7e6b74750b634190e17ede68"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4d584b249c79acae86729d216a5185d833a90477d566f094b47d39620493870"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b3784063fa43a0019b6a73e1e63b7fcbff4ded4d0ec5442202aa3caa12be9ef8"}, + {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:bd514420eb09bba897016b7f1a2c17f9f3f1a7bc320c0505c59c3225e024b51c"}, + {file = "pymongo-4.7.0-cp37-cp37m-win32.whl", hash = "sha256:31ed6426fc68d500e2f27346e4ce3cc4fd3438adc99a3aaae41578c8a3b1f467"}, + {file = "pymongo-4.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:69865d5739822c277d075a50601077767706e9f0862562e116ef13969d09fc9e"}, + {file = "pymongo-4.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fbad9290b32ff1fc38bcac42699b8ea6a7c49cab081ba54761f3109bc5703248"}, + {file = "pymongo-4.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5307bfda4f39d9f1b3df9ab96b22d44bca458e44286ce806d716a2ffed2c46da"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f1a2ee91a97904cd21bddfce58d1868b6ea67b99bdd81dfe9cebfe35d0d751b"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cefa4e9be8bffa80de1bd70ae5ee79973e5db10befabcb25289fb52231a0dcff"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7b8bd94c63cef8f5bfbb29568934213d9730381db94f467f979c9e5aaa27130"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8ff95728965e633591862bfc197018d25bc349b5cd8da080acb52a2d17a6e95"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07265c14aa40259771255dbf59f9160a3690e82522ed02ab07e0e5c3045bad5b"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7214b7599a9f2e4ed01ecdc034cbe8f2926954bfdad9277390dd1bccf9fd6553"}, + {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1864f224b1793ef8698f779a7808e2b8c4a8f26bd0612c578412f62d6e99be46"}, + {file = "pymongo-4.7.0-cp38-cp38-win32.whl", hash = "sha256:2bfaf7a7eb6a91dfe58f384be16fd895e040d17236ee82217d1be9fc56869dc8"}, + {file = "pymongo-4.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:2545c2be5ed25b1e9419cde4269d6a744076f80eaf86695d2dd888bddac29dd7"}, + {file = "pymongo-4.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e7a00cee5b7a4160eed9cb43a2539037f572f01ed7261c2d1b4f7217060dba61"}, + {file = "pymongo-4.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c85f9824a7e90bf49aeed953e63942bff499116312e555ccb51bd3bf7ebe9342"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:030dba8b3e1cb29f874739247e1eba1d01118a11583c62145c707a6e725d416a"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0dc2e365b14cb768898429e4331c58587be7143ad230858d19e8dd032f0adadc"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50865177882df0badc879c5b20f20cdc9c73494f0e2b19a40534af9c90018b4e"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c4b0d8393fb991b3dd934e891e064ae804e9267fce9d01d2f16b25e20564e3d"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7530ea1da6fe0bb1960390ba6523483dfdb2a6239d0e8058b1505cc2a79c75f8"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36536a41f08180adc647a21ca12dba859a23d841d28ca8fd3976c8781ed8290b"}, + {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b3a49be20a403d86eb1c559350fb56f28a859041756159eeb00e89f59b6e1288"}, + {file = "pymongo-4.7.0-cp39-cp39-win32.whl", hash = "sha256:a292ee4babdd632531effaac95da5f211caafa6a039c097a1b18a4dc0d52488b"}, + {file = "pymongo-4.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb809ff53ab3110ebc43a5e47aa945bb97e4ed9bc9beb07f935f5c83d9077e67"}, + {file = "pymongo-4.7.0.tar.gz", hash = "sha256:431093ef808944a14698b2a719b739fa7721778769e80c08423568991aa29c42"}, +] + +[package.dependencies] +dnspython = ">=1.16.0,<3.0.0" + +[package.extras] +aws = ["pymongo-auth-aws (>=1.1.0,<2.0.0)"] +encryption = ["certifi", "pymongo-auth-aws (>=1.1.0,<2.0.0)", "pymongocrypt (>=1.6.0,<2.0.0)"] +gssapi = ["pykerberos", "winkerberos (>=0.5.0)"] +ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"] +snappy = ["python-snappy"] +test = ["pytest (>=7)"] +zstd = ["zstandard"] + [[package]] name = "pytest" version = "7.4.4" @@ -750,7 +875,6 @@ files = [ {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"}, {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"}, - {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"}, {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"}, {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"}, {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"}, @@ -758,15 +882,8 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"}, {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"}, - {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"}, {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"}, {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, - {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, - {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, - {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, - {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"}, {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"}, {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"}, @@ -783,7 +900,6 @@ files = [ {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"}, {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"}, - {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"}, {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"}, {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"}, {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"}, @@ -791,7 +907,6 @@ files = [ {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"}, {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"}, - {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"}, {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"}, {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"}, {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, @@ -994,7 +1109,11 @@ h2 = ["h2 (>=4,<5)"] socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] zstd = ["zstandard (>=0.18.0)"] +[extras] +motor = ["motor"] +pymongo = ["pymongo"] + [metadata] lock-version = "2.0" python-versions = ">=3.8.1,<4.0" -content-hash = "650e3be56be3f44bc3458c111111a930e76cca8c096f52ccbafe788edcbce630" +content-hash = "d4c21b835dd0719271571740c480925765c92b4025d335de8303925293655ab7" diff --git a/libs/aws/pyproject.toml b/libs/aws/pyproject.toml index ea5a6862..f2f9f8ed 100644 --- a/libs/aws/pyproject.toml +++ b/libs/aws/pyproject.toml @@ -15,6 +15,12 @@ python = ">=3.8.1,<4.0" langchain-core = "^0.1.45" boto3 = ">=1.34.51,<1.35.0" numpy = "^1" +pymongo = {version = "^4.6.3", optional = true} +motor = {version = "^3.3.1", optional = true} + +[tool.poetry.extras] +pymongo = ["pymongo"] +motor = ["motor"] [tool.poetry.group.test] optional = true diff --git a/libs/aws/tests/integration_tests/vectorstores/__init__.py b/libs/aws/tests/integration_tests/vectorstores/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py new file mode 100644 index 00000000..c4f4568f --- /dev/null +++ b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py @@ -0,0 +1,728 @@ +"""Test DocumentDBVectorSearch functionality.""" +import logging +import os +from asyncio import sleep as asyncio_sleep +from time import sleep +from typing import Any, Optional, Tuple + +import pytest +from langchain_core.documents import Document +from langchain_openai.embeddings import OpenAIEmbeddings +from motor.core import AgnosticCollection +from pymongo.collection import Collection + +from langchain_aws.vectorstores.documentdb_new import ( + DocumentDBSimilarityType, + DocumentDBVectorSearch, +) + +logging.basicConfig(level=logging.DEBUG) + +model_deployment = os.getenv( + "OPENAI_EMBEDDINGS_DEPLOYMENT", "smart-agent-embedding-ada" +) +model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002") + +INDEX_NAME = "langchain-test-index" +NAMESPACE = "langchain_test_db.langchain_test_collection" +CONNECTION_STRING = os.getenv("DOCUMENTDB_URI", "") +DB_NAME, COLLECTION_NAME = NAMESPACE.split(".") + +dimensions = 1536 +similarity_algorithm = DocumentDBSimilarityType.COS + + +def prepare_collection() -> Tuple[Collection, AgnosticCollection]: + from motor.motor_asyncio import AsyncIOMotorClient + from pymongo import MongoClient + + test_client: MongoClient = MongoClient(CONNECTION_STRING) + test_async_client: AsyncIOMotorClient = AsyncIOMotorClient(CONNECTION_STRING) + return test_client[DB_NAME][COLLECTION_NAME], test_async_client[DB_NAME][ + COLLECTION_NAME + ] + + +@pytest.fixture() +def collections() -> Any: + return prepare_collection() + + +@pytest.fixture() +def embedding_openai() -> Any: + openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings( + deployment=model_deployment, model=model_name, chunk_size=1 + ) + return openai_embeddings + + +""" +This is how to run the integration tests: + +cd libs/community +make test TEST_FILE=tests/integration_tests/vectorstores/test_documentdb.py + +NOTE: You will first need to follow the contributor setup steps: +https://python.langchain.com/docs/contributing/code. You will also need to install +`pymongo` via `poetry`. You can also run the test directly using `pytest`, but please +make sure to install all dependencies. +""" + + +class TestDocumentDBVectorSearch: + @classmethod + def setup_class(cls) -> None: + if not os.getenv("OPENAI_API_KEY"): + raise ValueError("OPENAI_API_KEY environment variable is not set") + + # insure the test collection is empty + collection, async_collection = prepare_collection() + assert collection.count_documents({}) == 0 # type: ignore[index] # noqa: E501 + + @classmethod + def teardown_class(cls) -> None: + collection, async_collection = prepare_collection() + # delete all the documents in the collection + collection.delete_many({}) # type: ignore[index] + collection.drop_indexes() + + @pytest.fixture(autouse=True) + def setup(self) -> None: + collection, async_collection = prepare_collection() + # delete all the documents in the collection + collection.delete_many({}) # type: ignore[index] + collection.drop_indexes() + + def test_from_documents_cosine_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + """Test end to end construction and search.""" + documents = [ + Document(page_content="Dogs are tough.", metadata={"a": 1}), + Document(page_content="Cats have fluff.", metadata={"b": 1}), + Document(page_content="What is a sandwich?", metadata={"c": 1}), + Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}), + ] + + collection = collections[0] + vectorstore = DocumentDBVectorSearch.from_documents( + documents, + embedding_openai, + collection=collection, + index_name=INDEX_NAME, + ) + sleep(1) # waits for DocumentDB to save contents to the collection + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, similarity_algorithm) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + vectorstore.delete_index() + + async def test_afrom_documents_cosine_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + """Test end to end construction and search.""" + documents = [ + Document(page_content="Dogs are tough.", metadata={"a": 1}), + Document(page_content="Cats have fluff.", metadata={"b": 1}), + Document(page_content="What is a sandwich?", metadata={"c": 1}), + Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}), + ] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_documents( + documents, + embedding_openai, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + await asyncio_sleep( + 1 + ) # waits for DocumentDB to save contents to the collection + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, similarity_algorithm) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + await vectorstore.adelete_index() + + def test_from_documents_inner_product( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + """Test end to end construction and search.""" + documents = [ + Document(page_content="Dogs are tough.", metadata={"a": 1}), + Document(page_content="Cats have fluff.", metadata={"b": 1}), + Document(page_content="What is a sandwich?", metadata={"c": 1}), + Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}), + ] + collection = collections[0] + vectorstore = DocumentDBVectorSearch.from_documents( + documents, + embedding_openai, + collection=collection, + index_name=INDEX_NAME, + ) + sleep(1) # waits for DocumentDB to save contents to the collection + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, DocumentDBSimilarityType.DOT) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1, ef_search=100) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + vectorstore.delete_index() + + async def test_afrom_documents_inner_product( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + """Test end to end construction and search.""" + documents = [ + Document(page_content="Dogs are tough.", metadata={"a": 1}), + Document(page_content="Cats have fluff.", metadata={"b": 1}), + Document(page_content="What is a sandwich?", metadata={"c": 1}), + Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}), + ] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_documents( + documents, + embedding_openai, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + await asyncio_sleep( + 1 + ) # waits for DocumentDB to save contents to the collection + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, DocumentDBSimilarityType.DOT) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1, ef_search=100) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + await vectorstore.adelete_index() + + def test_from_texts_cosine_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "That fence is purple.", + ] + collection = collections[0] + vectorstore = DocumentDBVectorSearch.from_texts( + texts, + embedding_openai, + collection=collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, similarity_algorithm) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1) + + assert output[0].page_content == "What is a sandwich?" + vectorstore.delete_index() + + async def test_afrom_texts_cosine_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "That fence is purple.", + ] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_texts( + texts, + embedding_openai, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, similarity_algorithm) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1) + + assert output[0].page_content == "What is a sandwich?" + await vectorstore.adelete_index() + + def test_from_texts_with_metadatas_cosine_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + collection = collections[0] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + vectorstore = DocumentDBVectorSearch.from_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, similarity_algorithm) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + + vectorstore.delete_index() + + async def test_afrom_texts_with_metadatas_cosine_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, similarity_algorithm) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + + await vectorstore.adelete_index() + + def test_from_texts_with_metadatas_delete_one( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + collection = collections[0] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + vectorstore = DocumentDBVectorSearch.from_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, similarity_algorithm) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + + first_document_id_object = output[0].metadata["_id"] + first_document_id = str(first_document_id_object) + + vectorstore.delete_document_by_id(first_document_id) + sleep(2) # waits for the index to be updated + + output2 = vectorstore.similarity_search("Sandwich", k=1, ef_search=10) + assert output2 + assert output2[0].page_content != "What is a sandwich?" + + vectorstore.delete_index() + + async def test_afrom_texts_with_metadatas_delete_one( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, similarity_algorithm) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + + first_document_id_object = output[0].metadata["_id"] + first_document_id = str(first_document_id_object) + + await vectorstore.adelete_document_by_id(first_document_id) + await asyncio_sleep(2) # waits for the index to be updated + + output2 = await vectorstore.asimilarity_search("Sandwich", k=1, ef_search=10) + assert output2 + assert output2[0].page_content != "What is a sandwich?" + + await vectorstore.adelete_index() + + def test_from_texts_with_metadatas_delete_multiple( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + collection = collections[0] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + vectorstore = DocumentDBVectorSearch.from_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, similarity_algorithm) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=5) + + first_document_id_object = output[0].metadata["_id"] + first_document_id = str(first_document_id_object) + + output[1].metadata["_id"] + second_document_id = output[1].metadata["_id"] + + output[2].metadata["_id"] + third_document_id = output[2].metadata["_id"] + + document_ids = [first_document_id, second_document_id, third_document_id] + vectorstore.delete(document_ids) + sleep(2) # waits for the index to be updated + + output_2 = vectorstore.similarity_search("Sandwich", k=5) + assert output + assert output_2 + + assert len(output) == 4 # we should see all the four documents + assert ( + len(output_2) == 1 + ) # we should see only one document left after three have been deleted + + vectorstore.delete_index() + + async def test_afrom_texts_with_metadatas_delete_multiple( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, similarity_algorithm) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=5) + + first_document_id_object = output[0].metadata["_id"] + first_document_id = str(first_document_id_object) + + output[1].metadata["_id"] + second_document_id = output[1].metadata["_id"] + + output[2].metadata["_id"] + third_document_id = output[2].metadata["_id"] + + document_ids = [first_document_id, second_document_id, third_document_id] + await vectorstore.adelete(document_ids) + await asyncio_sleep(2) # waits for the index to be updated + + output_2 = await vectorstore.asimilarity_search("Sandwich", k=5) + assert output + assert output_2 + + assert len(output) == 4 # we should see all the four documents + assert ( + len(output_2) == 1 + ) # we should see only one document left after three have been deleted + + await vectorstore.adelete_index() + + def test_from_texts_with_metadatas_inner_product( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + collection = collections[0] + vectorstore = DocumentDBVectorSearch.from_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, DocumentDBSimilarityType.DOT) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + vectorstore.delete_index() + + async def test_afrom_texts_with_metadatas_inner_product( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, DocumentDBSimilarityType.DOT) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + await vectorstore.adelete_index() + + def test_from_texts_with_metadatas_euclidean_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + collection = collections[0] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + vectorstore = DocumentDBVectorSearch.from_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + vectorstore.create_index(dimensions, DocumentDBSimilarityType.EUC) + sleep(2) # waits for the index to be set up + + output = vectorstore.similarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + vectorstore.delete_index() + + async def test_afrom_texts_with_metadatas_euclidean_distance( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + texts = [ + "Dogs are tough.", + "Cats have fluff.", + "What is a sandwich?", + "The fence is purple.", + ] + metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}] + collection, async_collection = collections + vectorstore = await DocumentDBVectorSearch.afrom_texts( + texts, + embedding_openai, + metadatas=metadatas, + collection=collection, + async_collection=async_collection, + index_name=INDEX_NAME, + ) + + # Create the HNSW index that will be leveraged later for vector search + await vectorstore.acreate_index(dimensions, DocumentDBSimilarityType.EUC) + await asyncio_sleep(2) # waits for the index to be set up + + output = await vectorstore.asimilarity_search("Sandwich", k=1) + + assert output + assert output[0].page_content == "What is a sandwich?" + assert output[0].metadata["c"] == 1 + await vectorstore.adelete_index() + + def invoke_delete_with_no_args( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> Optional[bool]: + vectorstore: DocumentDBVectorSearch = ( + DocumentDBVectorSearch.from_connection_string( + CONNECTION_STRING, + NAMESPACE, + embedding_openai, + index_name=INDEX_NAME, + ) + ) + + return vectorstore.delete() + + async def ainvoke_delete_with_no_args( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> Optional[bool]: + vectorstore: DocumentDBVectorSearch = ( + DocumentDBVectorSearch.afrom_connection_string( + CONNECTION_STRING, + NAMESPACE, + embedding_openai, + index_name=INDEX_NAME, + ) + ) + + return await vectorstore.adelete() + + def invoke_delete_by_id_with_no_args( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + vectorstore: DocumentDBVectorSearch = ( + DocumentDBVectorSearch.from_connection_string( + CONNECTION_STRING, + NAMESPACE, + embedding_openai, + index_name=INDEX_NAME, + ) + ) + + vectorstore.delete_document_by_id() + + async def ainvoke_delete_by_id_with_no_args( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + vectorstore: DocumentDBVectorSearch = ( + DocumentDBVectorSearch.afrom_connection_string( + CONNECTION_STRING, + NAMESPACE, + embedding_openai, + index_name=INDEX_NAME, + ) + ) + + await vectorstore.adelete_document_by_id() + + def test_invalid_arguments_to_delete( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + with pytest.raises(ValueError) as exception_info: + collection = collections[0] + self.invoke_delete_with_no_args(embedding_openai, collection) + assert str(exception_info.value) == "No document ids provided to delete." + + async def test_ainvalid_arguments_to_delete( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + with pytest.raises(ValueError) as exception_info: + collection = collections[0] + await self.ainvoke_delete_with_no_args(embedding_openai, collection) + assert str(exception_info.value) == "No document ids provided to delete." + + def test_no_arguments_to_delete_by_id( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + with pytest.raises(Exception) as exception_info: + collection = collections[0] + self.invoke_delete_by_id_with_no_args(embedding_openai, collection) + assert str(exception_info.value) == "No document id provided to delete." + + async def test_ano_arguments_to_delete_by_id( + self, embedding_openai: OpenAIEmbeddings, collections: Any + ) -> None: + with pytest.raises(Exception) as exception_info: + await self.ainvoke_delete_by_id_with_no_args(embedding_openai, collections) + assert str(exception_info.value) == "No document id provided to delete." From 2b26d5ac9c0f4d7244962873cf9fb6a32ce17b87 Mon Sep 17 00:00:00 2001 From: Alon Parag Date: Fri, 26 Apr 2024 16:10:39 +0200 Subject: [PATCH 2/3] going over __init__.py --- libs/aws/langchain_aws/__init__.py | 2 ++ .../vectorstores/{documentdb_new.py => documentdb.py} | 0 2 files changed, 2 insertions(+) rename libs/aws/langchain_aws/vectorstores/{documentdb_new.py => documentdb.py} (100%) diff --git a/libs/aws/langchain_aws/__init__.py b/libs/aws/langchain_aws/__init__.py index 68921e44..a19759fe 100644 --- a/libs/aws/langchain_aws/__init__.py +++ b/libs/aws/langchain_aws/__init__.py @@ -6,6 +6,7 @@ AmazonKendraRetriever, AmazonKnowledgeBasesRetriever, ) +from langchain_aws.vectorstores.documentdb import DocumentDBVectorSearch __all__ = [ "Bedrock", @@ -13,6 +14,7 @@ "BedrockLLM", "BedrockChat", "ChatBedrock", + "DocumentDBVectorSearch", "SagemakerEndpoint", "AmazonKendraRetriever", "AmazonKnowledgeBasesRetriever", diff --git a/libs/aws/langchain_aws/vectorstores/documentdb_new.py b/libs/aws/langchain_aws/vectorstores/documentdb.py similarity index 100% rename from libs/aws/langchain_aws/vectorstores/documentdb_new.py rename to libs/aws/langchain_aws/vectorstores/documentdb.py From 3f885dbf31742f54c46aa9c1bb24cc3f8d4701c4 Mon Sep 17 00:00:00 2001 From: Alon Parag Date: Fri, 26 Apr 2024 16:10:55 +0200 Subject: [PATCH 3/3] going over __init__.py --- .../aws/tests/integration_tests/vectorstores/test_documentdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py index c4f4568f..77afc9e1 100644 --- a/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py +++ b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py @@ -11,7 +11,7 @@ from motor.core import AgnosticCollection from pymongo.collection import Collection -from langchain_aws.vectorstores.documentdb_new import ( +from langchain_aws.vectorstores.documentdb import ( DocumentDBSimilarityType, DocumentDBVectorSearch, )