From d471261af92dfff33cf6bb81386c82cc38fdb8dd Mon Sep 17 00:00:00 2001
From: Alon Parag <alon.parag@taiwa.com>
Date: Fri, 26 Apr 2024 15:47:23 +0200
Subject: [PATCH 1/3] async-documentdb

---
 .../langchain_aws/vectorstores/__init__.py    |  49 ++
 .../vectorstores/documentdb_new.py            | 689 +++++++++++++++++
 libs/aws/poetry.lock                          | 167 +++-
 libs/aws/pyproject.toml                       |   6 +
 .../vectorstores/__init__.py                  |   0
 .../vectorstores/test_documentdb.py           | 728 ++++++++++++++++++
 6 files changed, 1615 insertions(+), 24 deletions(-)
 create mode 100644 libs/aws/langchain_aws/vectorstores/__init__.py
 create mode 100644 libs/aws/langchain_aws/vectorstores/documentdb_new.py
 create mode 100644 libs/aws/tests/integration_tests/vectorstores/__init__.py
 create mode 100644 libs/aws/tests/integration_tests/vectorstores/test_documentdb.py
diff --git a/libs/aws/langchain_aws/vectorstores/__init__.py b/libs/aws/langchain_aws/vectorstores/__init__.py
new file mode 100644
index 00000000..61ad8fb4
--- /dev/null
+++ b/libs/aws/langchain_aws/vectorstores/__init__.py
@@ -0,0 +1,49 @@
+"""**Vector store** stores embedded data and performs vector search.
+
+One of the most common ways to store and search over unstructured data is to
+embed it and store the resulting embedding vectors, and then query the store
+and retrieve the data that are 'most similar' to the embedded query.
+
+**Class hierarchy:**
+
+.. code-block::
+
+    VectorStore --> <name>  # Examples: Annoy, FAISS, Milvus
+
+    BaseRetriever --> VectorStoreRetriever --> <name>Retriever  # Example: VespaRetriever
+
+**Main helpers:**
+
+.. code-block::
+
+    Embeddings, Document
+"""  # noqa: E501
+
+import importlib
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from langchain_core.vectorstores import (
+        VectorStore,  # noqa: F401
+    )
+
+    from langchain_aws.vectorstores.documentdb import (
+        DocumentDBVectorSearch,  # noqa: F401
+    )
+__all__ = [
+    "DocumentDBVectorSearch",
+]
+
+_module_lookup = {
+    "DocumentDBVectorSearch": "langchain_community.vectorstores.documentdb",
+}
+
+
+def __getattr__(name: str) -> Any:
+    if name in _module_lookup:
+        module = importlib.import_module(_module_lookup[name])
+        return getattr(module, name)
+    raise AttributeError(f"module {__name__} has no attribute {name}")
+
+
+__all__ = list(_module_lookup.keys())
diff --git a/libs/aws/langchain_aws/vectorstores/documentdb_new.py b/libs/aws/langchain_aws/vectorstores/documentdb_new.py
new file mode 100644
index 00000000..a6ea3943
--- /dev/null
+++ b/libs/aws/langchain_aws/vectorstores/documentdb_new.py
@@ -0,0 +1,689 @@
+from __future__ import annotations
+
+import logging
+from enum import Enum
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    Generator,
+    Iterable,
+    List,
+    Optional,
+    TypeVar,
+    Union,
+)
+
+from langchain_core.documents import Document
+from langchain_core.vectorstores import VectorStore
+
+if TYPE_CHECKING:
+    from langchain_core.embeddings import Embeddings
+    from motor.core import AgnosticClient, AgnosticCollection  # noqa: F401
+    from pymongo.collection import Collection
+
+
+# Before Python 3.11 native StrEnum is not available
+class DocumentDBSimilarityType(str, Enum):
+    """DocumentDB Similarity Type as enumerator."""
+
+    COS = "cosine"
+    """Cosine similarity"""
+    DOT = "dotProduct"
+    """Dot product"""
+    EUC = "euclidean"
+    """Euclidean distance"""
+
+
+DocumentDBDocumentType = TypeVar("DocumentDBDocumentType", bound=Dict[str, Any])
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_INSERT_BATCH_SIZE = 128
+
+
+class DocumentDBVectorSearch(VectorStore):
+    """`Amazon DocumentDB (with MongoDB compatibility)` vector store.
+    Please refer to the official Vector Search documentation for more details:
+    https://docs.aws.amazon.com/documentdb/latest/developerguide/vector-search.html
+
+    To use, you should have both:
+    - the ``pymongo`` python package installed
+    - a connection string and credentials associated with a DocumentDB cluster
+
+    Example:
+        . code-block:: python
+
+            from langchain_community.vectorstores import DocumentDBVectorSearch
+            from langchain_community.embeddings.openai import OpenAIEmbeddings
+            from pymongo import MongoClient
+
+            mongo_client = MongoClient("<YOUR-CONNECTION-STRING>")
+            collection = mongo_client["<db_name>"]["<collection_name>"]
+            embeddings = OpenAIEmbeddings()
+            vectorstore = DocumentDBVectorSearch(collection, embeddings)
+    """
+
+    def __init__(
+        self,
+        collection: Collection[DocumentDBDocumentType],
+        embedding: Embeddings,
+        *,
+        index_name: str = "vectorSearchIndex",
+        text_key: str = "textContent",
+        embedding_key: str = "vectorContent",
+        is_async: bool = False,
+        async_collection: AgnosticCollection[DocumentDBDocumentType],
+    ):
+        """Constructor for DocumentDBVectorSearch
+
+        Args:
+            collection: MongoDB collection to add the texts to.
+            embedding: Text embedding model to use.
+            index_name: Name of the Vector Search index.
+            text_key: MongoDB field that will contain the text
+                for each document.
+            embedding_key: MongoDB field that will contain the embedding
+                for each document.
+        """
+        self._collection = collection
+        self._embedding = embedding
+        self._index_name = index_name
+        self._text_key = text_key
+        self._embedding_key = embedding_key
+        self._similarity_type = DocumentDBSimilarityType.COS
+        self.is_async = is_async
+        if is_async and async_collection is None:
+            raise ValueError(
+                f"Expecting `async_collection` when `is_async` is defined.\n \
+                    Go async_collection = `{async_collection}`"
+            )
+        self._async_collection = async_collection
+
+    @property
+    def embeddings(self) -> Embeddings:
+        return self._embedding
+
+    def validate_async(self) -> None:
+        if not self.is_async:
+            raise RuntimeError(
+                f"Async functions can only be called \n \
+                    when the object `is_async` flag is `True`.\n \
+                               is_async = `{self.is_async}`"
+            )
+
+    def get_index_name(self) -> str:
+        """Returns the index name
+
+        Returns:
+            Returns the index name
+
+        """
+        return self._index_name
+
+    @classmethod
+    def from_connection_string(
+        cls,
+        connection_string: str,
+        namespace: str,
+        embedding: Embeddings,
+        **kwargs: Any,
+    ) -> DocumentDBVectorSearch:
+        """Creates an Instance of DocumentDBVectorSearch from a Connection String
+
+        Args:
+            connection_string: The DocumentDB cluster endpoint connection string
+            namespace: The namespace (database.collection)
+            embedding: The embedding utility
+            **kwargs: Dynamic keyword arguments
+
+        Returns:
+            an instance of the vector store
+
+        """
+        try:
+            from pymongo import MongoClient
+        except ImportError:
+            raise ImportError(
+                "Could not import pymongo, please install it with "
+                "`pip install pymongo`."
+            )
+        client: MongoClient = MongoClient(connection_string)
+        db_name, collection_name = namespace.split(".")
+        collection = client[db_name][collection_name]
+        return cls(collection, embedding, **kwargs)
+
+    @classmethod
+    def afrom_connection_string(
+        cls,
+        connection_string: str,
+        namespace: str,
+        embedding: Embeddings,
+        **kwargs: Any,
+    ) -> "DocumentDBVectorSearch":
+        """Creates an Instance of DocumentDBVectorSearch from a Connection
+        String.
+
+        Args:
+            connection_string: The DocumentDB cluster endpoint connection string
+            namespace: The namespace (database.collection)
+            embedding: The embedding utility
+            **kwargs: Dynamic keyword arguments
+
+        Returns:
+            an instance of the vector store
+        """
+        try:
+            from pymongo import MongoClient
+        except ImportError:
+            raise ImportError(
+                "Could not import pymongo, please install it with "
+                "`pip install pymongo`."
+            )
+        client: MongoClient = MongoClient(connection_string)
+        try:
+            from motor.core import AgnosticClient  # noqa: F811 F401
+            from motor.motor_asyncio import AsyncIOMotorClient
+        except ImportError:
+            raise ImportError(
+                "Could not import motor, please install it with " "`pip install motor`."
+            )
+        async_client: AgnosticClient = AsyncIOMotorClient(connection_string)
+        db_name, collection_name = namespace.split(".")
+        collection = client[db_name][collection_name]
+        async_collection = async_client[db_name][collection_name]
+        return cls(
+            collection,
+            embedding,
+            is_async=True,
+            async_collection=async_collection,
+            **kwargs,
+        )
+
+    def index_exists(self) -> bool:
+        """Verifies if the specified index name during instance
+            construction exists on the collection
+
+        Returns:
+          Returns True on success and False if no such index exists
+            on the collection
+        """
+        cursor = self._collection.list_indexes()
+        index_name = self._index_name
+
+        for res in cursor:
+            current_index_name = res.pop("name")
+            if current_index_name == index_name:
+                return True
+
+        return False
+
+    async def aindex_exists(self) -> bool:
+        """Verifies if the specified index name during instance construction
+        exists on the collection.
+
+        Returns:
+          Returns True on success and False if no such index exists
+            on the collection
+        """
+        self.validate_async()
+        cursor = self._async_collection.list_indexes()
+        index_name = self._index_name
+
+        async for res in cursor:
+            current_index_name = res.pop("name")
+            if current_index_name == index_name:
+                return True
+
+        return False
+
+    def delete_index(self) -> None:
+        """Deletes the index specified during instance construction if it exists"""
+        if self.index_exists():
+            self._collection.drop_index(self._index_name)
+            # Raises OperationFailure on an error (e.g. trying to drop
+            # an index that does not exist)
+
+    async def adelete_index(self) -> None:
+        """Deletes the index specified during instance construction if it
+        exists."""
+        self.validate_async()
+        if await self.aindex_exists():
+            await self._async_collection.drop_index(self._index_name)
+            # Raises OperationFailure on an error (e.g. trying to drop
+            # an index that does not exist)
+
+    def create_index(
+        self,
+        dimensions: int = 1536,
+        similarity: DocumentDBSimilarityType = DocumentDBSimilarityType.COS,
+        m: int = 16,
+        ef_construction: int = 64,
+    ) -> dict[str, Any]:
+        """Creates an index using the index name specified at
+            instance construction
+
+        Args:
+            dimensions: Number of dimensions for vector similarity.
+                The maximum number of supported dimensions is 2000
+
+            similarity: Similarity algorithm to use with the HNSW index.
+
+            m: Specifies the max number of connections for an HNSW index.
+                Large impact on memory consumption.
+
+            ef_construction: Specifies the size of the dynamic candidate list
+                for constructing the graph for HNSW index. Higher values lead
+                to more accurate results but slower indexing speed.
+
+                Possible options are:
+                    - DocumentDBSimilarityType.COS (cosine distance),
+                    - DocumentDBSimilarityType.EUC (Euclidean distance), and
+                    - DocumentDBSimilarityType.DOT (dot product).
+
+        Returns:
+            An object describing the created index
+
+        """
+        self._similarity_type = similarity
+
+        # prepare the command
+        create_index_commands = {
+            "createIndexes": self._collection.name,
+            "indexes": [
+                {
+                    "name": self._index_name,
+                    "key": {self._embedding_key: "vector"},
+                    "vectorOptions": {
+                        "type": "hnsw",
+                        "similarity": similarity,
+                        "dimensions": dimensions,
+                        "m": m,
+                        "efConstruction": ef_construction,
+                    },
+                }
+            ],
+        }
+
+        # retrieve the database object
+        current_database = self._collection.database
+
+        # invoke the command from the database object
+        create_index_responses: dict[str, Any] = current_database.command(
+            create_index_commands
+        )
+
+        return create_index_responses
+
+    async def acreate_index(
+        self,
+        dimensions: int = 1536,
+        similarity: DocumentDBSimilarityType = DocumentDBSimilarityType.COS,
+        m: int = 16,
+        ef_construction: int = 64,
+    ) -> dict[str, Any]:
+        """Creates an index using the index name specified at instance
+        construction.
+
+        Args:
+            dimensions: Number of dimensions for vector similarity.
+                The maximum number of supported dimensions is 2000
+
+            similarity: Similarity algorithm to use with the HNSW index.
+
+            m: Specifies the max number of connections for an HNSW index.
+                Large impact on memory consumption.
+
+            ef_construction: Specifies the size of the dynamic candidate list
+                for constructing the graph for HNSW index. Higher values lead
+                to more accurate results but slower indexing speed.
+
+                Possible options are:
+                    - DocumentDBSimilarityType.COS (cosine distance),
+                    - DocumentDBSimilarityType.EUC (Euclidean distance), and
+                    - DocumentDBSimilarityType.DOT (dot product).
+
+        Returns:
+            An object describing the created index
+        """
+        self.validate_async()
+        self._similarity_type = similarity
+
+        # prepare the command
+        create_index_commands = {
+            "createIndexes": self._async_collection.name,
+            "indexes": [
+                {
+                    "name": self._index_name,
+                    "key": {self._embedding_key: "vector"},
+                    "vectorOptions": {
+                        "type": "hnsw",
+                        "similarity": similarity,
+                        "dimensions": dimensions,
+                        "m": m,
+                        "efConstruction": ef_construction,
+                    },
+                }
+            ],
+        }
+
+        # retrieve the database object
+        current_database = self._async_collection.database
+
+        # invoke the command from the database object
+        create_index_responses: dict[str, Any] = await current_database.command(
+            create_index_commands
+        )
+
+        return create_index_responses
+
+    def add_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[Dict[str, Any]]] = None,
+        **kwargs: Any,
+    ) -> List:
+        batch_size = kwargs.get("batch_size", DEFAULT_INSERT_BATCH_SIZE)
+        _metadatas: Union[List, Generator] = metadatas or ({} for _ in texts)
+        texts_batch = []
+        metadatas_batch = []
+        result_ids = []
+        for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
+            texts_batch.append(text)
+            metadatas_batch.append(metadata)
+            if (i + 1) % batch_size == 0:
+                result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
+                texts_batch = []
+                metadatas_batch = []
+        if texts_batch:
+            result_ids.extend(self._insert_texts(texts_batch, metadatas_batch))
+        return result_ids
+
+    async def aadd_texts(
+        self,
+        texts: Iterable[str],
+        metadatas: Optional[List[Dict[str, Any]]] = None,
+        **kwargs: Any,
+    ) -> List:
+        self.validate_async()
+        batch_size = kwargs.get("batch_size", DEFAULT_INSERT_BATCH_SIZE)
+        _metadatas: Union[List, Generator] = metadatas or ({} for _ in texts)
+        texts_batch = []
+        metadatas_batch = []
+        result_ids = []
+        for i, (text, metadata) in enumerate(zip(texts, _metadatas)):
+            texts_batch.append(text)
+            metadatas_batch.append(metadata)
+            if (i + 1) % batch_size == 0:
+                new_result_ids = await self._ainsert_texts(texts_batch, metadatas_batch)
+                result_ids.extend(new_result_ids)
+                texts_batch = []
+                metadatas_batch = []
+        if texts_batch:
+            new_result_ids = await self._ainsert_texts(texts_batch, metadatas_batch)
+            result_ids.extend(new_result_ids)
+        return result_ids
+
+    async def _ainsert_texts(
+        self, texts: List[str], metadatas: List[Dict[str, Any]]
+    ) -> List:
+        """Used to Load Documents into the collection.
+
+        Args:
+            texts: The list of documents strings to load
+            metadatas: The list of metadata objects associated with each document
+
+        Returns:
+        """
+        self.validate_async()
+        # If the text is empty, then exit early
+        if not texts:
+            return []
+
+        # Embed and create the documents
+        embeddings = self._embedding.embed_documents(texts)
+        to_insert = [
+            {self._text_key: t, self._embedding_key: embedding, **m}
+            for t, m, embedding in zip(texts, metadatas, embeddings)
+        ]
+        # insert the documents in DocumentDB
+        insert_result = await self._async_collection.insert_many(to_insert)  # type: ignore
+        return insert_result.inserted_ids
+
+    def _insert_texts(self, texts: List[str], metadatas: List[Dict[str, Any]]) -> List:
+        """Used to Load Documents into the collection
+
+        Args:
+            texts: The list of documents strings to load
+            metadatas: The list of metadata objects associated with each document
+
+        Returns:
+
+        """
+        # If the text is empty, then exit early
+        if not texts:
+            return []
+
+        # Embed and create the documents
+        embeddings = self._embedding.embed_documents(texts)
+        to_insert = [
+            {self._text_key: t, self._embedding_key: embedding, **m}
+            for t, m, embedding in zip(texts, metadatas, embeddings)
+        ]
+        # insert the documents in DocumentDB
+        insert_result = self._collection.insert_many(to_insert)  # type: ignore
+        return insert_result.inserted_ids
+
+    @classmethod
+    def from_texts(
+        cls,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        collection: Optional[Collection[DocumentDBDocumentType]] = None,
+        **kwargs: Any,
+    ) -> DocumentDBVectorSearch:
+        if collection is None:
+            raise ValueError("Must provide 'collection' named parameter.")
+        vectorstore = cls(collection, embedding, **kwargs)
+        vectorstore.add_texts(texts, metadatas=metadatas)
+        return vectorstore
+
+    @classmethod
+    async def afrom_texts(
+        cls,
+        texts: List[str],
+        embedding: Embeddings,
+        metadatas: Optional[List[dict]] = None,
+        collection: Optional[Collection[DocumentDBDocumentType]] = None,
+        async_collection: Optional[AgnosticCollection[DocumentDBDocumentType]] = None,
+        **kwargs: Any,
+    ) -> DocumentDBVectorSearch:
+        if collection is None or async_collection is None:
+            raise ValueError(
+                f"Must provide 'collection' and `async_collection` named parameters.\n \
+                    got collection: `{collection}`\n \
+                    async_collection: `{async_collection}`"
+            )
+        vectorstore = cls(
+            collection,
+            embedding,
+            is_async=True,
+            async_collection=async_collection,
+            **kwargs,
+        )
+        await vectorstore.aadd_texts(texts, metadatas=metadatas)
+        return vectorstore
+
+    def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> Optional[bool]:
+        if ids is None:
+            raise ValueError("No document ids provided to delete.")
+
+        for document_id in ids:
+            self.delete_document_by_id(document_id)
+        return True
+
+    async def adelete(
+        self, ids: Optional[List[str]] = None, **kwargs: Any
+    ) -> Optional[bool]:
+        self.validate_async()
+        if ids is None:
+            raise ValueError("No document ids provided to delete.")
+
+        for document_id in ids:
+            await self.adelete_document_by_id(document_id)
+        return True
+
+    def delete_document_by_id(self, document_id: Optional[str] = None) -> None:
+        """Removes a Specific Document by Id
+
+        Args:
+            document_id: The document identifier
+        """
+        try:
+            from bson.objectid import ObjectId
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import bson, please install with `pip install bson`."
+            ) from e
+        if document_id is None:
+            raise ValueError("No document id provided to delete.")
+
+        self._collection.delete_one({"_id": ObjectId(document_id)})
+
+    async def adelete_document_by_id(self, document_id: Optional[str] = None) -> None:
+        """Removes a Specific Document by Id.
+
+        Args:
+            document_id: The document identifier
+        """
+        self.validate_async()
+        try:
+            from bson.objectid import ObjectId
+        except ImportError as e:
+            raise ImportError(
+                "Unable to import bson, please install with `pip install bson`."
+            ) from e
+        if document_id is None:
+            raise ValueError("No document id provided to delete.")
+
+        await self._async_collection.delete_one({"_id": ObjectId(document_id)})
+
+    def _similarity_search_without_score(
+        self,
+        embeddings: List[float],
+        k: int = 4,
+        ef_search: int = 40,
+        agg_operators: List[Dict[str, Any]] | None = None,
+    ) -> List[Document]:
+        """Returns a list of documents.
+
+        Args:
+            embeddings: The query vector
+            k: the number of documents to return
+            ef_search: Specifies the size of the dynamic candidate list
+                that HNSW index uses during search. A higher value of
+                efSearch provides better recall at cost of speed.
+
+        Returns:
+            A list of documents closest to the query vector
+        """
+        pipeline: List[dict[str, Any]] = [
+            {
+                "$search": {
+                    "vectorSearch": {
+                        "vector": embeddings,
+                        "path": self._embedding_key,
+                        "similarity": self._similarity_type,
+                        "k": k,
+                        "efSearch": ef_search,
+                    }
+                }
+            }
+        ]
+        if agg_operators:
+            pipeline = agg_operators + pipeline
+        cursor = self._collection.aggregate(pipeline)
+
+        docs = []
+
+        for res in cursor:
+            text = res.pop(self._text_key)
+            docs.append(Document(page_content=text, metadata=res))
+
+        return docs
+
+    async def _asimilarity_search_without_score(
+        self,
+        embeddings: List[float],
+        k: int = 4,
+        ef_search: int = 40,
+        agg_operators: List[Dict[str, Any]] | None = None,
+    ) -> List[Document]:
+        """Returns a list of documents.
+
+        Args:
+            embeddings: The query vector
+            k: the number of documents to return
+            ef_search: Specifies the size of the dynamic candidate list
+                that HNSW index uses during search. A higher value of
+                efSearch provides better recall at cost of speed.
+            agg_operators: Any additional mongodb aggregation operators
+
+        Returns:
+            A list of documents closest to the query vector
+        """
+        self.validate_async()
+        pipeline: List[dict[str, Any]] = [
+            {
+                "$search": {
+                    "vectorSearch": {
+                        "vector": embeddings,
+                        "path": self._embedding_key,
+                        "similarity": self._similarity_type,
+                        "k": k,
+                        "efSearch": ef_search,
+                    }
+                }
+            }
+        ]
+        if agg_operators:
+            pipeline = agg_operators + pipeline
+        cursor = self._async_collection.aggregate(pipeline)
+
+        docs = []
+
+        async for res in cursor:
+            text = res.pop(self._text_key)
+            docs.append(Document(page_content=text, metadata=res))
+
+        return docs
+
+    def similarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        ef_search: int = 40,
+        agg_operators: List[Dict[str, Any]] | None = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        embeddings = self._embedding.embed_query(query)
+        docs = self._similarity_search_without_score(
+            embeddings=embeddings, k=k, ef_search=ef_search
+        )
+        return [doc for doc in docs]
+
+    async def asimilarity_search(
+        self,
+        query: str,
+        k: int = 4,
+        ef_search: int = 40,
+        agg_operators: List[Dict[str, Any]] | None = None,
+        **kwargs: Any,
+    ) -> List[Document]:
+        self.validate_async()
+        embeddings = self._embedding.embed_query(query)
+        docs = await self._asimilarity_search_without_score(
+            embeddings=embeddings, k=k, ef_search=ef_search, agg_operators=agg_operators
+        )
+        return [doc for doc in docs]
diff --git a/libs/aws/poetry.lock b/libs/aws/poetry.lock
index bca8815a..3f681263 100644
--- a/libs/aws/poetry.lock
+++ b/libs/aws/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
 
 [[package]]
 name = "annotated-types"
@@ -16,17 +16,17 @@ typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.9\""}
 
 [[package]]
 name = "boto3"
-version = "1.34.90"
+version = "1.34.92"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "boto3-1.34.90-py3-none-any.whl", hash = "sha256:b2e5cb5b95efcc881e25a3bc872d7a24e75ff4e76f368138e4baf7b9d6ee3422"},
-    {file = "boto3-1.34.90.tar.gz", hash = "sha256:2824e3dd18743ca50e5b10439d20e74647b1416e8a94509cb30beac92d27a18d"},
+    {file = "boto3-1.34.92-py3-none-any.whl", hash = "sha256:db7bbb1c6059e99b74dcf634e497b04addcac4c527ae2b2696e47c39eccc6c50"},
+    {file = "boto3-1.34.92.tar.gz", hash = "sha256:684cba753d64978a486e8ea9645d53de0d4e3b4a3ab1495b26bd04b9541cea2d"},
 ]
 
 [package.dependencies]
-botocore = ">=1.34.90,<1.35.0"
+botocore = ">=1.34.92,<1.35.0"
 jmespath = ">=0.7.1,<2.0.0"
 s3transfer = ">=0.10.0,<0.11.0"
 
@@ -35,13 +35,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
 
 [[package]]
 name = "botocore"
-version = "1.34.90"
+version = "1.34.92"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "botocore-1.34.90-py3-none-any.whl", hash = "sha256:d48f152498e2c60b43ce25b579d26642346a327b6fb2c632d57219e0a4f63392"},
-    {file = "botocore-1.34.90.tar.gz", hash = "sha256:113cd4c0cb63e13163ccbc2bb13d551be314ba7f8ba5bfab1c51a19ca01aa133"},
+    {file = "botocore-1.34.92-py3-none-any.whl", hash = "sha256:4211a22a1f6c6935e70cbb84c2cd93b29f9723eaf5036d59748dd104f389a681"},
+    {file = "botocore-1.34.92.tar.gz", hash = "sha256:d1ca4886271f184445ec737cd2e752498648cca383887c5a37b2e01c8ab94039"},
 ]
 
 [package.dependencies]
@@ -260,6 +260,26 @@ tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.1
 [package.extras]
 toml = ["tomli"]
 
+[[package]]
+name = "dnspython"
+version = "2.6.1"
+description = "DNS toolkit"
+optional = true
+python-versions = ">=3.8"
+files = [
+    {file = "dnspython-2.6.1-py3-none-any.whl", hash = "sha256:5ef3b9680161f6fa89daf8ad451b5f1a33b18ae8a1c6778cdf4b43f08c0a6e50"},
+    {file = "dnspython-2.6.1.tar.gz", hash = "sha256:e8f0f9c23a7b7cb99ded64e6c3a6f3e701d78f50c55e002b839dea7225cff7cc"},
+]
+
+[package.extras]
+dev = ["black (>=23.1.0)", "coverage (>=7.0)", "flake8 (>=7)", "mypy (>=1.8)", "pylint (>=3)", "pytest (>=7.4)", "pytest-cov (>=4.1.0)", "sphinx (>=7.2.0)", "twine (>=4.0.0)", "wheel (>=0.42.0)"]
+dnssec = ["cryptography (>=41)"]
+doh = ["h2 (>=4.1.0)", "httpcore (>=1.0.0)", "httpx (>=0.26.0)"]
+doq = ["aioquic (>=0.9.25)"]
+idna = ["idna (>=3.6)"]
+trio = ["trio (>=0.23)"]
+wmi = ["wmi (>=1.5.1)"]
+
 [[package]]
 name = "exceptiongroup"
 version = "1.2.1"
@@ -334,7 +354,7 @@ files = [
 
 [[package]]
 name = "langchain-core"
-version = "0.1.45"
+version = "0.1.46"
 description = "Building applications with LLMs through composability"
 optional = false
 python-versions = ">=3.8.1,<4.0"
@@ -356,18 +376,18 @@ extended-testing = ["jinja2 (>=3,<4)"]
 type = "git"
 url = "https://github.com/langchain-ai/langchain.git"
 reference = "HEAD"
-resolved_reference = "87d31a3ec0d4aeb7fe3af90f00511677c38f3a3b"
+resolved_reference = "4c437ebb9c2fb532ce655ac1e0c354c82a715df7"
 subdirectory = "libs/core"
 
 [[package]]
 name = "langsmith"
-version = "0.1.50"
+version = "0.1.51"
 description = "Client library to connect to the LangSmith LLM Tracing and Evaluation Platform."
 optional = false
 python-versions = "<4.0,>=3.8.1"
 files = [
-    {file = "langsmith-0.1.50-py3-none-any.whl", hash = "sha256:a81e9809fcaa277bfb314d729e58116554f186d1478fcfdf553b1c2ccce54b85"},
-    {file = "langsmith-0.1.50.tar.gz", hash = "sha256:9fd22df8c689c044058536ea5af66f5302067e7551b60d7a335fede8d479572b"},
+    {file = "langsmith-0.1.51-py3-none-any.whl", hash = "sha256:1e7363a3f472ecf02a1d91f6dbacde25519554b98c490be71716fcffaab0ca6b"},
+    {file = "langsmith-0.1.51.tar.gz", hash = "sha256:b99b40a8c00e66174540865caa61412622fa1dc4f02602965364919c90528f97"},
 ]
 
 [package.dependencies]
@@ -375,6 +395,30 @@ orjson = ">=3.9.14,<4.0.0"
 pydantic = ">=1,<3"
 requests = ">=2,<3"
 
+[[package]]
+name = "motor"
+version = "3.4.0"
+description = "Non-blocking MongoDB driver for Tornado or asyncio"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "motor-3.4.0-py3-none-any.whl", hash = "sha256:4b1e1a0cc5116ff73be2c080a72da078f2bb719b53bc7a6bb9e9a2f7dcd421ed"},
+    {file = "motor-3.4.0.tar.gz", hash = "sha256:c89b4e4eb2e711345e91c7c9b122cb68cce0e5e869ed0387dd0acb10775e3131"},
+]
+
+[package.dependencies]
+pymongo = ">=4.5,<5"
+
+[package.extras]
+aws = ["pymongo[aws] (>=4.5,<5)"]
+encryption = ["pymongo[encryption] (>=4.5,<5)"]
+gssapi = ["pymongo[gssapi] (>=4.5,<5)"]
+ocsp = ["pymongo[ocsp] (>=4.5,<5)"]
+snappy = ["pymongo[snappy] (>=4.5,<5)"]
+srv = ["pymongo[srv] (>=4.5,<5)"]
+test = ["aiohttp (!=3.8.6)", "mockupdb", "motor[encryption]", "pytest (>=7)", "tornado (>=5)"]
+zstd = ["pymongo[zstd] (>=4.5,<5)"]
+
 [[package]]
 name = "mypy"
 version = "1.10.0"
@@ -666,6 +710,87 @@ files = [
 [package.dependencies]
 typing-extensions = ">=4.6.0,<4.7.0 || >4.7.0"
 
+[[package]]
+name = "pymongo"
+version = "4.7.0"
+description = "Python driver for MongoDB <http://www.mongodb.org>"
+optional = true
+python-versions = ">=3.7"
+files = [
+    {file = "pymongo-4.7.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8449b6af19cac09cce9d0834c196b29b72b29e05724f4ea208b3f602fdd47086"},
+    {file = "pymongo-4.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb00787bed1939ef21ffcb09b3034b193c3c6e9838724e2c05ef881cb2b03a33"},
+    {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8c4cbe5a1258b9f3a49f83781c8b2fb58f39a682779a3c81dc444a609cb15ba"},
+    {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:12db8e8768bd0d4a433eea3463f05648c3f65f262776c777a0e19e7c55f27a73"},
+    {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7be2e57df38fa9b1b6f9ebe5bedd38118b511d3bdf0d9e77158c476542c9153d"},
+    {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4b2b49670b32df8cf6650133cf439593f0291228ce971094c62c3a478024c7d1"},
+    {file = "pymongo-4.7.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5366f28b2115120611536914540b0d247a89b09bb80bbc78893f246a584165b9"},
+    {file = "pymongo-4.7.0-cp310-cp310-win32.whl", hash = "sha256:6c993fff4c110f6de4d76b76af97733efecae83b688cb27d1a3c5431415e3803"},
+    {file = "pymongo-4.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:66b490775aa4542e0585ffdff1d0c6c4279536c852334f34a6a9a5c882beafd4"},
+    {file = "pymongo-4.7.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9584be3d20ee26b53c0b1e25ba38196b7f65f594f48211b5ab3fa12b428ec6a9"},
+    {file = "pymongo-4.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:db2885773af0c10420e6bb86e84ee780bc3817d45a29ef24d8f6376ae2351eec"},
+    {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8af3de7fea21b1ced0770766ec37a5900a62b45fe4b8f1dfa521226d591dbf66"},
+    {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:78b0ba6d60c7f2ac779909ac53383c83584826a304206559599c46a33366622a"},
+    {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c82105c91cf95821039aca48350630435e7be18989496b6292aaa8779fa5fb6"},
+    {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44eb2a3adaa0916f2fb6812d4d805956fd376b7fceae3b62f5dfae5e29330786"},
+    {file = "pymongo-4.7.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2161278182f3163d15afc3c578097ec20c844ac7180e41134a2a2b5c9ae77b9d"},
+    {file = "pymongo-4.7.0-cp311-cp311-win32.whl", hash = "sha256:98cb932ab936d702e28cf8da1982dcf5e7cfc35736b7516c0df7aaa46c63e0e2"},
+    {file = "pymongo-4.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:3f1d57edc2a4bd96ae5741e4d83d3d54695174fd9068c88c89e12f7262be4de4"},
+    {file = "pymongo-4.7.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:36d05d1ff861dda7c9e84d9848ea6f2b5d2245ae1093865d14597de29ba95b37"},
+    {file = "pymongo-4.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0ad32bb7e5f889fc5994001f7bb8bf945b52e10e428a563dfce0661961eae224"},
+    {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8885f825203fa14ce863b462effcd93e07bfc6e582b3b93cfcde5ae42ccc9923"},
+    {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cf4187bc91bd10e29857775651101d0ec26e580d6b46a8c5cbf93928358ac3c3"},
+    {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:aebd99aaea95c48fba24bc3d7b72e7bf70e06df4c647de938c4d3dce2fd25a1c"},
+    {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:52facf98dcba501b2ae337d21f065cc30ceb25b97ce8f17878c1ae9d781f7f26"},
+    {file = "pymongo-4.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f807dadc8030a5b55915f78fac25393af47bee8ccb62b5a6c5c622274ff4adf1"},
+    {file = "pymongo-4.7.0-cp312-cp312-win32.whl", hash = "sha256:7a3c9218c5bc4384fa079f41b744473ada6a5f549fc11a4ae0fe7287746acc04"},
+    {file = "pymongo-4.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:97ccb53d9310d5963df1a4543f1cfabdfd914638a5c8438234f6ed70d9303222"},
+    {file = "pymongo-4.7.0-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:41d647fdaedba2f5b5c92299575814c164af44696fed3a4fc0d0df4f29eabcb2"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f53cf5bf65dda3fc1b5ec5f760233a41b282db3157d135e9272101f0492825f"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6673daf8fc23a96934cbb7a3626dcfa3ae21510492047e6003dfe3f26e62886b"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:16d7fc4891f5482e42c35be6931e9cf6b635d7d95056ff45b56bae5f0384830f"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8fc34b4d92d5d8671be6b728076f275ccfe8495c7e6b74750b634190e17ede68"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d4d584b249c79acae86729d216a5185d833a90477d566f094b47d39620493870"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b3784063fa43a0019b6a73e1e63b7fcbff4ded4d0ec5442202aa3caa12be9ef8"},
+    {file = "pymongo-4.7.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:bd514420eb09bba897016b7f1a2c17f9f3f1a7bc320c0505c59c3225e024b51c"},
+    {file = "pymongo-4.7.0-cp37-cp37m-win32.whl", hash = "sha256:31ed6426fc68d500e2f27346e4ce3cc4fd3438adc99a3aaae41578c8a3b1f467"},
+    {file = "pymongo-4.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:69865d5739822c277d075a50601077767706e9f0862562e116ef13969d09fc9e"},
+    {file = "pymongo-4.7.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:fbad9290b32ff1fc38bcac42699b8ea6a7c49cab081ba54761f3109bc5703248"},
+    {file = "pymongo-4.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:5307bfda4f39d9f1b3df9ab96b22d44bca458e44286ce806d716a2ffed2c46da"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2f1a2ee91a97904cd21bddfce58d1868b6ea67b99bdd81dfe9cebfe35d0d751b"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cefa4e9be8bffa80de1bd70ae5ee79973e5db10befabcb25289fb52231a0dcff"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b7b8bd94c63cef8f5bfbb29568934213d9730381db94f467f979c9e5aaa27130"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c8ff95728965e633591862bfc197018d25bc349b5cd8da080acb52a2d17a6e95"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:07265c14aa40259771255dbf59f9160a3690e82522ed02ab07e0e5c3045bad5b"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7214b7599a9f2e4ed01ecdc034cbe8f2926954bfdad9277390dd1bccf9fd6553"},
+    {file = "pymongo-4.7.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:1864f224b1793ef8698f779a7808e2b8c4a8f26bd0612c578412f62d6e99be46"},
+    {file = "pymongo-4.7.0-cp38-cp38-win32.whl", hash = "sha256:2bfaf7a7eb6a91dfe58f384be16fd895e040d17236ee82217d1be9fc56869dc8"},
+    {file = "pymongo-4.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:2545c2be5ed25b1e9419cde4269d6a744076f80eaf86695d2dd888bddac29dd7"},
+    {file = "pymongo-4.7.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:e7a00cee5b7a4160eed9cb43a2539037f572f01ed7261c2d1b4f7217060dba61"},
+    {file = "pymongo-4.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c85f9824a7e90bf49aeed953e63942bff499116312e555ccb51bd3bf7ebe9342"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:030dba8b3e1cb29f874739247e1eba1d01118a11583c62145c707a6e725d416a"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0dc2e365b14cb768898429e4331c58587be7143ad230858d19e8dd032f0adadc"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:50865177882df0badc879c5b20f20cdc9c73494f0e2b19a40534af9c90018b4e"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c4b0d8393fb991b3dd934e891e064ae804e9267fce9d01d2f16b25e20564e3d"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7530ea1da6fe0bb1960390ba6523483dfdb2a6239d0e8058b1505cc2a79c75f8"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:36536a41f08180adc647a21ca12dba859a23d841d28ca8fd3976c8781ed8290b"},
+    {file = "pymongo-4.7.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.whl", hash = "sha256:b3a49be20a403d86eb1c559350fb56f28a859041756159eeb00e89f59b6e1288"},
+    {file = "pymongo-4.7.0-cp39-cp39-win32.whl", hash = "sha256:a292ee4babdd632531effaac95da5f211caafa6a039c097a1b18a4dc0d52488b"},
+    {file = "pymongo-4.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:cb809ff53ab3110ebc43a5e47aa945bb97e4ed9bc9beb07f935f5c83d9077e67"},
+    {file = "pymongo-4.7.0.tar.gz", hash = "sha256:431093ef808944a14698b2a719b739fa7721778769e80c08423568991aa29c42"},
+]
+
+[package.dependencies]
+dnspython = ">=1.16.0,<3.0.0"
+
+[package.extras]
+aws = ["pymongo-auth-aws (>=1.1.0,<2.0.0)"]
+encryption = ["certifi", "pymongo-auth-aws (>=1.1.0,<2.0.0)", "pymongocrypt (>=1.6.0,<2.0.0)"]
+gssapi = ["pykerberos", "winkerberos (>=0.5.0)"]
+ocsp = ["certifi", "cryptography (>=2.5)", "pyopenssl (>=17.2.0)", "requests (<3.0.0)", "service-identity (>=18.1.0)"]
+snappy = ["python-snappy"]
+test = ["pytest (>=7)"]
+zstd = ["zstandard"]
+
 [[package]]
 name = "pytest"
 version = "7.4.4"
@@ -750,7 +875,6 @@ files = [
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
     {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
-    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
     {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
     {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
     {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
@@ -758,15 +882,8 @@ files = [
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
     {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
-    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
     {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
     {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
-    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
-    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
-    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
-    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
     {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
     {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
@@ -783,7 +900,6 @@ files = [
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
     {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
-    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
     {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
     {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
     {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
@@ -791,7 +907,6 @@ files = [
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
     {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
-    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
     {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
     {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
     {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
@@ -994,7 +1109,11 @@ h2 = ["h2 (>=4,<5)"]
 socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
 zstd = ["zstandard (>=0.18.0)"]
 
+[extras]
+motor = ["motor"]
+pymongo = ["pymongo"]
+
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "650e3be56be3f44bc3458c111111a930e76cca8c096f52ccbafe788edcbce630"
+content-hash = "d4c21b835dd0719271571740c480925765c92b4025d335de8303925293655ab7"
diff --git a/libs/aws/pyproject.toml b/libs/aws/pyproject.toml
index ea5a6862..f2f9f8ed 100644
--- a/libs/aws/pyproject.toml
+++ b/libs/aws/pyproject.toml
@@ -15,6 +15,12 @@ python = ">=3.8.1,<4.0"
 langchain-core = "^0.1.45"
 boto3 = ">=1.34.51,<1.35.0"
 numpy = "^1"
+pymongo = {version = "^4.6.3", optional = true}
+motor = {version = "^3.3.1", optional = true}
+
+[tool.poetry.extras]
+pymongo = ["pymongo"]
+motor = ["motor"]
 
 [tool.poetry.group.test]
 optional = true
diff --git a/libs/aws/tests/integration_tests/vectorstores/__init__.py b/libs/aws/tests/integration_tests/vectorstores/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py
new file mode 100644
index 00000000..c4f4568f
--- /dev/null
+++ b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py
@@ -0,0 +1,728 @@
+"""Test DocumentDBVectorSearch functionality."""
+import logging
+import os
+from asyncio import sleep as asyncio_sleep
+from time import sleep
+from typing import Any, Optional, Tuple
+
+import pytest
+from langchain_core.documents import Document
+from langchain_openai.embeddings import OpenAIEmbeddings
+from motor.core import AgnosticCollection
+from pymongo.collection import Collection
+
+from langchain_aws.vectorstores.documentdb_new import (
+    DocumentDBSimilarityType,
+    DocumentDBVectorSearch,
+)
+
+logging.basicConfig(level=logging.DEBUG)
+
+model_deployment = os.getenv(
+    "OPENAI_EMBEDDINGS_DEPLOYMENT", "smart-agent-embedding-ada"
+)
+model_name = os.getenv("OPENAI_EMBEDDINGS_MODEL_NAME", "text-embedding-ada-002")
+
+INDEX_NAME = "langchain-test-index"
+NAMESPACE = "langchain_test_db.langchain_test_collection"
+CONNECTION_STRING = os.getenv("DOCUMENTDB_URI", "")
+DB_NAME, COLLECTION_NAME = NAMESPACE.split(".")
+
+dimensions = 1536
+similarity_algorithm = DocumentDBSimilarityType.COS
+
+
+def prepare_collection() -> Tuple[Collection, AgnosticCollection]:
+    from motor.motor_asyncio import AsyncIOMotorClient
+    from pymongo import MongoClient
+
+    test_client: MongoClient = MongoClient(CONNECTION_STRING)
+    test_async_client: AsyncIOMotorClient = AsyncIOMotorClient(CONNECTION_STRING)
+    return test_client[DB_NAME][COLLECTION_NAME], test_async_client[DB_NAME][
+        COLLECTION_NAME
+    ]
+
+
+@pytest.fixture()
+def collections() -> Any:
+    return prepare_collection()
+
+
+@pytest.fixture()
+def embedding_openai() -> Any:
+    openai_embeddings: OpenAIEmbeddings = OpenAIEmbeddings(
+        deployment=model_deployment, model=model_name, chunk_size=1
+    )
+    return openai_embeddings
+
+
+"""
+This is how to run the integration tests:
+
+cd libs/community
+make test TEST_FILE=tests/integration_tests/vectorstores/test_documentdb.py
+
+NOTE: You will first need to follow the contributor setup steps:
+https://python.langchain.com/docs/contributing/code. You will also need to install
+`pymongo` via `poetry`. You can also run the test directly using `pytest`, but please
+make sure to install all dependencies.
+"""
+
+
+class TestDocumentDBVectorSearch:
+    @classmethod
+    def setup_class(cls) -> None:
+        if not os.getenv("OPENAI_API_KEY"):
+            raise ValueError("OPENAI_API_KEY environment variable is not set")
+
+        # insure the test collection is empty
+        collection, async_collection = prepare_collection()
+        assert collection.count_documents({}) == 0  # type: ignore[index]  # noqa: E501
+
+    @classmethod
+    def teardown_class(cls) -> None:
+        collection, async_collection = prepare_collection()
+        # delete all the documents in the collection
+        collection.delete_many({})  # type: ignore[index]
+        collection.drop_indexes()
+
+    @pytest.fixture(autouse=True)
+    def setup(self) -> None:
+        collection, async_collection = prepare_collection()
+        # delete all the documents in the collection
+        collection.delete_many({})  # type: ignore[index]
+        collection.drop_indexes()
+
+    def test_from_documents_cosine_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        """Test end to end construction and search."""
+        documents = [
+            Document(page_content="Dogs are tough.", metadata={"a": 1}),
+            Document(page_content="Cats have fluff.", metadata={"b": 1}),
+            Document(page_content="What is a sandwich?", metadata={"c": 1}),
+            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+        ]
+
+        collection = collections[0]
+        vectorstore = DocumentDBVectorSearch.from_documents(
+            documents,
+            embedding_openai,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+        sleep(1)  # waits for DocumentDB to save contents to the collection
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, similarity_algorithm)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        vectorstore.delete_index()
+
+    async def test_afrom_documents_cosine_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        """Test end to end construction and search."""
+        documents = [
+            Document(page_content="Dogs are tough.", metadata={"a": 1}),
+            Document(page_content="Cats have fluff.", metadata={"b": 1}),
+            Document(page_content="What is a sandwich?", metadata={"c": 1}),
+            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+        ]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_documents(
+            documents,
+            embedding_openai,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+        await asyncio_sleep(
+            1
+        )  # waits for DocumentDB to save contents to the collection
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, similarity_algorithm)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        await vectorstore.adelete_index()
+
+    def test_from_documents_inner_product(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        """Test end to end construction and search."""
+        documents = [
+            Document(page_content="Dogs are tough.", metadata={"a": 1}),
+            Document(page_content="Cats have fluff.", metadata={"b": 1}),
+            Document(page_content="What is a sandwich?", metadata={"c": 1}),
+            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+        ]
+        collection = collections[0]
+        vectorstore = DocumentDBVectorSearch.from_documents(
+            documents,
+            embedding_openai,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+        sleep(1)  # waits for DocumentDB to save contents to the collection
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, DocumentDBSimilarityType.DOT)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1, ef_search=100)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        vectorstore.delete_index()
+
+    async def test_afrom_documents_inner_product(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        """Test end to end construction and search."""
+        documents = [
+            Document(page_content="Dogs are tough.", metadata={"a": 1}),
+            Document(page_content="Cats have fluff.", metadata={"b": 1}),
+            Document(page_content="What is a sandwich?", metadata={"c": 1}),
+            Document(page_content="That fence is purple.", metadata={"d": 1, "e": 2}),
+        ]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_documents(
+            documents,
+            embedding_openai,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+        await asyncio_sleep(
+            1
+        )  # waits for DocumentDB to save contents to the collection
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, DocumentDBSimilarityType.DOT)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1, ef_search=100)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        await vectorstore.adelete_index()
+
+    def test_from_texts_cosine_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "That fence is purple.",
+        ]
+        collection = collections[0]
+        vectorstore = DocumentDBVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, similarity_algorithm)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1)
+
+        assert output[0].page_content == "What is a sandwich?"
+        vectorstore.delete_index()
+
+    async def test_afrom_texts_cosine_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "That fence is purple.",
+        ]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_texts(
+            texts,
+            embedding_openai,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, similarity_algorithm)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1)
+
+        assert output[0].page_content == "What is a sandwich?"
+        await vectorstore.adelete_index()
+
+    def test_from_texts_with_metadatas_cosine_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        collection = collections[0]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        vectorstore = DocumentDBVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, similarity_algorithm)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+
+        vectorstore.delete_index()
+
+    async def test_afrom_texts_with_metadatas_cosine_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, similarity_algorithm)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+
+        await vectorstore.adelete_index()
+
+    def test_from_texts_with_metadatas_delete_one(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        collection = collections[0]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        vectorstore = DocumentDBVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, similarity_algorithm)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+
+        first_document_id_object = output[0].metadata["_id"]
+        first_document_id = str(first_document_id_object)
+
+        vectorstore.delete_document_by_id(first_document_id)
+        sleep(2)  # waits for the index to be updated
+
+        output2 = vectorstore.similarity_search("Sandwich", k=1, ef_search=10)
+        assert output2
+        assert output2[0].page_content != "What is a sandwich?"
+
+        vectorstore.delete_index()
+
+    async def test_afrom_texts_with_metadatas_delete_one(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, similarity_algorithm)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+
+        first_document_id_object = output[0].metadata["_id"]
+        first_document_id = str(first_document_id_object)
+
+        await vectorstore.adelete_document_by_id(first_document_id)
+        await asyncio_sleep(2)  # waits for the index to be updated
+
+        output2 = await vectorstore.asimilarity_search("Sandwich", k=1, ef_search=10)
+        assert output2
+        assert output2[0].page_content != "What is a sandwich?"
+
+        await vectorstore.adelete_index()
+
+    def test_from_texts_with_metadatas_delete_multiple(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        collection = collections[0]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        vectorstore = DocumentDBVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, similarity_algorithm)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=5)
+
+        first_document_id_object = output[0].metadata["_id"]
+        first_document_id = str(first_document_id_object)
+
+        output[1].metadata["_id"]
+        second_document_id = output[1].metadata["_id"]
+
+        output[2].metadata["_id"]
+        third_document_id = output[2].metadata["_id"]
+
+        document_ids = [first_document_id, second_document_id, third_document_id]
+        vectorstore.delete(document_ids)
+        sleep(2)  # waits for the index to be updated
+
+        output_2 = vectorstore.similarity_search("Sandwich", k=5)
+        assert output
+        assert output_2
+
+        assert len(output) == 4  # we should see all the four documents
+        assert (
+            len(output_2) == 1
+        )  # we should see only one document left after three have been deleted
+
+        vectorstore.delete_index()
+
+    async def test_afrom_texts_with_metadatas_delete_multiple(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, similarity_algorithm)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=5)
+
+        first_document_id_object = output[0].metadata["_id"]
+        first_document_id = str(first_document_id_object)
+
+        output[1].metadata["_id"]
+        second_document_id = output[1].metadata["_id"]
+
+        output[2].metadata["_id"]
+        third_document_id = output[2].metadata["_id"]
+
+        document_ids = [first_document_id, second_document_id, third_document_id]
+        await vectorstore.adelete(document_ids)
+        await asyncio_sleep(2)  # waits for the index to be updated
+
+        output_2 = await vectorstore.asimilarity_search("Sandwich", k=5)
+        assert output
+        assert output_2
+
+        assert len(output) == 4  # we should see all the four documents
+        assert (
+            len(output_2) == 1
+        )  # we should see only one document left after three have been deleted
+
+        await vectorstore.adelete_index()
+
+    def test_from_texts_with_metadatas_inner_product(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        collection = collections[0]
+        vectorstore = DocumentDBVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, DocumentDBSimilarityType.DOT)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        vectorstore.delete_index()
+
+    async def test_afrom_texts_with_metadatas_inner_product(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, DocumentDBSimilarityType.DOT)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        await vectorstore.adelete_index()
+
+    def test_from_texts_with_metadatas_euclidean_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        collection = collections[0]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        vectorstore = DocumentDBVectorSearch.from_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        vectorstore.create_index(dimensions, DocumentDBSimilarityType.EUC)
+        sleep(2)  # waits for the index to be set up
+
+        output = vectorstore.similarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        vectorstore.delete_index()
+
+    async def test_afrom_texts_with_metadatas_euclidean_distance(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        texts = [
+            "Dogs are tough.",
+            "Cats have fluff.",
+            "What is a sandwich?",
+            "The fence is purple.",
+        ]
+        metadatas = [{"a": 1}, {"b": 1}, {"c": 1}, {"d": 1, "e": 2}]
+        collection, async_collection = collections
+        vectorstore = await DocumentDBVectorSearch.afrom_texts(
+            texts,
+            embedding_openai,
+            metadatas=metadatas,
+            collection=collection,
+            async_collection=async_collection,
+            index_name=INDEX_NAME,
+        )
+
+        # Create the HNSW index that will be leveraged later for vector search
+        await vectorstore.acreate_index(dimensions, DocumentDBSimilarityType.EUC)
+        await asyncio_sleep(2)  # waits for the index to be set up
+
+        output = await vectorstore.asimilarity_search("Sandwich", k=1)
+
+        assert output
+        assert output[0].page_content == "What is a sandwich?"
+        assert output[0].metadata["c"] == 1
+        await vectorstore.adelete_index()
+
+    def invoke_delete_with_no_args(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> Optional[bool]:
+        vectorstore: DocumentDBVectorSearch = (
+            DocumentDBVectorSearch.from_connection_string(
+                CONNECTION_STRING,
+                NAMESPACE,
+                embedding_openai,
+                index_name=INDEX_NAME,
+            )
+        )
+
+        return vectorstore.delete()
+
+    async def ainvoke_delete_with_no_args(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> Optional[bool]:
+        vectorstore: DocumentDBVectorSearch = (
+            DocumentDBVectorSearch.afrom_connection_string(
+                CONNECTION_STRING,
+                NAMESPACE,
+                embedding_openai,
+                index_name=INDEX_NAME,
+            )
+        )
+
+        return await vectorstore.adelete()
+
+    def invoke_delete_by_id_with_no_args(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        vectorstore: DocumentDBVectorSearch = (
+            DocumentDBVectorSearch.from_connection_string(
+                CONNECTION_STRING,
+                NAMESPACE,
+                embedding_openai,
+                index_name=INDEX_NAME,
+            )
+        )
+
+        vectorstore.delete_document_by_id()
+
+    async def ainvoke_delete_by_id_with_no_args(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        vectorstore: DocumentDBVectorSearch = (
+            DocumentDBVectorSearch.afrom_connection_string(
+                CONNECTION_STRING,
+                NAMESPACE,
+                embedding_openai,
+                index_name=INDEX_NAME,
+            )
+        )
+
+        await vectorstore.adelete_document_by_id()
+
+    def test_invalid_arguments_to_delete(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        with pytest.raises(ValueError) as exception_info:
+            collection = collections[0]
+            self.invoke_delete_with_no_args(embedding_openai, collection)
+        assert str(exception_info.value) == "No document ids provided to delete."
+
+    async def test_ainvalid_arguments_to_delete(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        with pytest.raises(ValueError) as exception_info:
+            collection = collections[0]
+            await self.ainvoke_delete_with_no_args(embedding_openai, collection)
+        assert str(exception_info.value) == "No document ids provided to delete."
+
+    def test_no_arguments_to_delete_by_id(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        with pytest.raises(Exception) as exception_info:
+            collection = collections[0]
+            self.invoke_delete_by_id_with_no_args(embedding_openai, collection)
+        assert str(exception_info.value) == "No document id provided to delete."
+
+    async def test_ano_arguments_to_delete_by_id(
+        self, embedding_openai: OpenAIEmbeddings, collections: Any
+    ) -> None:
+        with pytest.raises(Exception) as exception_info:
+            await self.ainvoke_delete_by_id_with_no_args(embedding_openai, collections)
+        assert str(exception_info.value) == "No document id provided to delete."

From 2b26d5ac9c0f4d7244962873cf9fb6a32ce17b87 Mon Sep 17 00:00:00 2001
From: Alon Parag <alon.parag@taiwa.com>
Date: Fri, 26 Apr 2024 16:10:39 +0200
Subject: [PATCH 2/3] going over __init__.py

---
 libs/aws/langchain_aws/__init__.py                              | 2 ++
 .../vectorstores/{documentdb_new.py => documentdb.py}           | 0
 2 files changed, 2 insertions(+)
 rename libs/aws/langchain_aws/vectorstores/{documentdb_new.py => documentdb.py} (100%)

diff --git a/libs/aws/langchain_aws/__init__.py b/libs/aws/langchain_aws/__init__.py
index 68921e44..a19759fe 100644
--- a/libs/aws/langchain_aws/__init__.py
+++ b/libs/aws/langchain_aws/__init__.py
@@ -6,6 +6,7 @@
     AmazonKendraRetriever,
     AmazonKnowledgeBasesRetriever,
 )
+from langchain_aws.vectorstores.documentdb import DocumentDBVectorSearch
 
 __all__ = [
     "Bedrock",
@@ -13,6 +14,7 @@
     "BedrockLLM",
     "BedrockChat",
     "ChatBedrock",
+    "DocumentDBVectorSearch",
     "SagemakerEndpoint",
     "AmazonKendraRetriever",
     "AmazonKnowledgeBasesRetriever",
diff --git a/libs/aws/langchain_aws/vectorstores/documentdb_new.py b/libs/aws/langchain_aws/vectorstores/documentdb.py
similarity index 100%
rename from libs/aws/langchain_aws/vectorstores/documentdb_new.py
rename to libs/aws/langchain_aws/vectorstores/documentdb.py

From 3f885dbf31742f54c46aa9c1bb24cc3f8d4701c4 Mon Sep 17 00:00:00 2001
From: Alon Parag <alon.parag@taiwa.com>
Date: Fri, 26 Apr 2024 16:10:55 +0200
Subject: [PATCH 3/3] going over __init__.py

---
 .../aws/tests/integration_tests/vectorstores/test_documentdb.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py
index c4f4568f..77afc9e1 100644
--- a/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py
+++ b/libs/aws/tests/integration_tests/vectorstores/test_documentdb.py
@@ -11,7 +11,7 @@
 from motor.core import AgnosticCollection
 from pymongo.collection import Collection
 
-from langchain_aws.vectorstores.documentdb_new import (
+from langchain_aws.vectorstores.documentdb import (
     DocumentDBSimilarityType,
     DocumentDBVectorSearch,
 )