Skip to content

Commit

Permalink
partners: add lint docstrings for chroma module (#23249)
Browse files Browse the repository at this point in the history
Description: add lint docstrings for chroma module
Issue: the issue #23188 @baskaryan

test:  ruff check passed.


![image](https://github.com/langchain-ai/langchain/assets/76683249/5e168a0c-32d0-464f-8ddb-110233918019)

---------

Co-authored-by: gongwn1 <[email protected]>
  • Loading branch information
Zizo-Vi and gongwn1 authored Jun 21, 2024
1 parent 9eda8f2 commit f9aea3d
Show file tree
Hide file tree
Showing 5 changed files with 168 additions and 22 deletions.
4 changes: 4 additions & 0 deletions libs/partners/chroma/langchain_chroma/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""This is the langchain_chroma package.
It contains the Chroma class for handling various tasks.
"""
from langchain_chroma.vectorstores import Chroma

__all__ = [
Expand Down
43 changes: 32 additions & 11 deletions libs/partners/chroma/langchain_chroma/vectorstores.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""This is the langchain_chroma.vectorstores module.
It contains the Chroma class which is a vector store for handling various tasks.
"""
from __future__ import annotations

import base64
Expand Down Expand Up @@ -98,7 +102,6 @@ def maximal_marginal_relevance(
Returns:
List of indices of embeddings selected by maximal marginal relevance.
"""

if min(k, len(embedding_list)) <= 0:
return []
if query_embedding.ndim == 1:
Expand Down Expand Up @@ -159,7 +162,7 @@ def __init__(
Args:
collection_name: Name of the collection to create.
embedding_function: Embedding class object. Used to embed texts.
persist_director: Directory to persist the collection.
persist_directory: Directory to persist the collection.
client_settings: Chroma client settings
collection_metadata: Collection configurations.
client: Chroma client. Documentation:
Expand Down Expand Up @@ -223,6 +226,7 @@ def _collection(self) -> chromadb.Collection:

@property
def embeddings(self) -> Optional[Embeddings]:
"""Access the query embedding object."""
return self._embedding_function

@xor_args(("query_texts", "query_embeddings"))
Expand All @@ -245,6 +249,7 @@ def __query_collection(
e.g. {"color" : "red", "price": 4.20}.
where_document: dict used to filter by the documents.
E.g. {$contains: {"text": "hello"}}.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of `n_results` nearest neighbor embeddings for provided
Expand Down Expand Up @@ -280,6 +285,7 @@ def add_images(
metadatas: Optional list of metadatas.
When querying, you can filter on this metadata.
ids: Optional list of IDs.
**kwargs: Additional keyword arguments to pass.
Returns:
List of IDs of the added images.
Expand Down Expand Up @@ -367,14 +373,14 @@ def add_texts(
metadatas: Optional list of metadatas.
When querying, you can filter on this metadata.
ids: Optional list of IDs.
**kwargs: Additional keyword arguments.
Returns:
List of IDs of the added texts.
Raises:
ValueError: When metadata is incorrect.
"""

if ids is None:
ids = [str(uuid.uuid4()) for _ in texts]
embeddings = None
Expand Down Expand Up @@ -449,6 +455,7 @@ def similarity_search(
query: Query text to search for.
k: Number of results to return. Defaults to 4.
filter: Filter by metadata. Defaults to None.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of documents most similar to the query text.
Expand All @@ -474,6 +481,7 @@ def similarity_search_by_vector(
filter: Filter by metadata. Defaults to None.
where_document: dict used to filter by the documents.
E.g. {$contains: {"text": "hello"}}.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of Documents most similar to the query vector.
Expand All @@ -495,15 +503,15 @@ def similarity_search_by_vector_with_relevance_scores(
where_document: Optional[Dict[str, str]] = None,
**kwargs: Any,
) -> List[Tuple[Document, float]]:
"""
Return docs most similar to embedding vector and similarity score.
"""Return docs most similar to embedding vector and similarity score.
Args:
embedding (List[float]): Embedding to look up documents similar to.
k: Number of Documents to return. Defaults to 4.
filter: Filter by metadata. Defaults to None.
where_document: dict used to filter by the documents.
E.g. {$contains: {"text": "hello"}}.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of documents most similar to the query text and relevance score
Expand Down Expand Up @@ -534,6 +542,7 @@ def similarity_search_with_score(
filter: Filter by metadata. Defaults to None.
where_document: dict used to filter by the documents.
E.g. {$contains: {"text": "hello"}}.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of documents most similar to the query text and
Expand Down Expand Up @@ -574,7 +583,6 @@ def _select_relevance_score_fn(self) -> Callable[[float], float]:
Raises:
ValueError: If the distance metric is not supported.
"""

if self.override_relevance_score_fn:
return self.override_relevance_score_fn

Expand Down Expand Up @@ -623,11 +631,13 @@ def max_marginal_relevance_search_by_vector(
to maximum diversity and 1 to minimum diversity.
Defaults to 0.5.
filter: Filter by metadata. Defaults to None.
where_document: dict used to filter by the documents.
E.g. {$contains: {"text": "hello"}}.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of Documents selected by maximal marginal relevance.
"""

results = self.__query_collection(
query_embeddings=embedding,
n_results=fetch_k,
Expand Down Expand Up @@ -659,6 +669,7 @@ def max_marginal_relevance_search(
**kwargs: Any,
) -> List[Document]:
"""Return docs selected using the maximal marginal relevance.
Maximal marginal relevance optimizes for similarity to query AND diversity
among selected documents.
Expand All @@ -673,6 +684,7 @@ def max_marginal_relevance_search(
filter: Filter by metadata. Defaults to None.
where_document: dict used to filter by the documents.
E.g. {$contains: {"text": "hello"}}.
**kwargs: Additional keyword arguments to pass to Chroma collection query.
Returns:
List of Documents selected by maximal marginal relevance.
Expand Down Expand Up @@ -701,8 +713,10 @@ def delete_collection(self) -> None:
self._chroma_collection = None

def reset_collection(self) -> None:
"""Resets the collection by deleting the collection
and recreating an empty one."""
"""Resets the collection.
Resets the collection by deleting the collection and recreating an empty one.
"""
self.delete_collection()
self.__ensure_collection()

Expand Down Expand Up @@ -827,9 +841,12 @@ def from_texts(
embedding: Embedding function. Defaults to None.
metadatas: List of metadatas. Defaults to None.
ids: List of document IDs. Defaults to None.
client_settings: Chroma client settings
client_settings: Chroma client settings.
client: Chroma client. Documentation:
https://docs.trychroma.com/reference/js-client#class:-chromaclient
collection_metadata: Collection configurations.
Defaults to None.
**kwargs: Additional keyword arguments to initialize a Chroma client.
Returns:
Chroma: Chroma vectorstore.
Expand Down Expand Up @@ -889,9 +906,12 @@ def from_documents(
ids : List of document IDs. Defaults to None.
documents: List of documents to add to the vectorstore.
embedding: Embedding function. Defaults to None.
client_settings: Chroma client settings
client_settings: Chroma client settings.
client: Chroma client. Documentation:
https://docs.trychroma.com/reference/js-client#class:-chromaclient
collection_metadata: Collection configurations.
Defaults to None.
**kwargs: Additional keyword arguments to initialize a Chroma client.
Returns:
Chroma: Chroma vectorstore.
Expand All @@ -916,5 +936,6 @@ def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None:
Args:
ids: List of ids to delete.
**kwargs: Additional keyword arguments.
"""
self._collection.delete(ids=ids)
Loading

0 comments on commit f9aea3d

Please sign in to comment.