Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc: fixing docstrings and API docs for gradient #507

Merged
merged 13 commits into from
Mar 5, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,32 @@ def _alt_progress_bar(x: Any) -> Any:
class GradientDocumentEmbedder:
"""
A component for computing Document embeddings using Gradient AI API.

The embedding of each Document is stored in the `embedding` field of the Document.

Usage example:
davidsbatista marked this conversation as resolved.
Show resolved Hide resolved
```python
embedder = GradientDocumentEmbedder(model="bge_large")
p = Pipeline()
p.add_component(embedder, name="document_embedder")
p.add_component(instance=GradientDocumentEmbedder(
p.add_component(instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
p.connect("document_embedder", "document_writer")
p.run({"document_embedder": {"documents": documents}})
from haystack import Pipeline
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack.components.writers import DocumentWriter
from haystack import Document

from haystack_integrations.components.embedders.gradient import GradientDocumentEmbedder

documents = [
Document(content="My name is Jean and I live in Paris."),
Document(content="My name is Mark and I live in Berlin."),
Document(content="My name is Giorgio and I live in Rome."),
]

indexing_pipeline = Pipeline()
indexing_pipeline.add_component(instance=GradientDocumentEmbedder(), name="document_embedder")
indexing_pipeline.add_component(
instance=DocumentWriter(document_store=InMemoryDocumentStore()), name="document_writer")
)
indexing_pipeline.connect("document_embedder", "document_writer")
indexing_pipeline.run({"document_embedder": {"documents": documents}})
>>> {'document_writer': {'documents_written': 3}}
```
"""

Expand All @@ -53,7 +69,7 @@ def __init__(
:param batch_size: Update cycle for tqdm progress bar, default is to update every 32_768 docs.
:param access_token: The Gradient access token.
:param workspace_id: The Gradient workspace ID.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
:param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
:param progress_bar: Whether to show a progress bar while embedding the documents.
"""
self._batch_size = batch_size
Expand All @@ -75,8 +91,12 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> dict:
"""
Serialize the component to a Python dictionary.
Serialize this component to a dictionary.

:returns:
The serialized component as a dictionary.
"""

return default_to_dict(
self,
model=self._model_name,
Expand All @@ -91,13 +111,17 @@ def to_dict(self) -> dict:
def from_dict(cls, data: Dict[str, Any]) -> "GradientDocumentEmbedder":
"""
Deserialize this component from a dictionary.

:param data: The dictionary representation of this component.
:returns:
The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
return default_from_dict(cls, data)

def warm_up(self) -> None:
"""
Load the embedding model.
Initializes the component.
"""
if not hasattr(self, "_embedding_model"):
self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name)
Expand Down Expand Up @@ -125,9 +149,14 @@ def _generate_embeddings(self, documents: List[Document], batch_size: int) -> Li
def run(self, documents: List[Document]):
"""
Embed a list of Documents.

The embedding of each Document is stored in the `embedding` field of the Document.

:param documents: A list of Documents to embed.
davidsbatista marked this conversation as resolved.
Show resolved Hide resolved
:returns:
A dictionary with the following keys:
- `documents`: The embedded Documents.

"""
if not isinstance(documents, list) or documents and any(not isinstance(doc, Document) for doc in documents):
msg = "GradientDocumentEmbedder expects a list of Documents as input.\
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,23 @@
@component
class GradientTextEmbedder:
"""
A component for embedding strings using models hosted on Gradient AI (https://gradient.ai).
A component for embedding strings using models hosted on [Gradient AI](https://gradient.ai).

Usage example:
davidsbatista marked this conversation as resolved.
Show resolved Hide resolved
```python
embedder = GradientTextEmbedder(model="bge_large")
from haystack_integrations.components.embedders.gradient import GradientTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.document_stores.in_memory import InMemoryDocumentStore
from haystack import Pipeline

embedder =
p = Pipeline()
p.add_component(instance=embedder, name="text_embedder")
p.add_component(instance=InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()), name="retriever")
p.add_component("text_embedder", GradientTextEmbedder(model="bge-large"))
p.add_component("retriever", InMemoryEmbeddingRetriever(document_store=InMemoryDocumentStore()))
p.connect("text_embedder", "retriever")
p.run("embed me!!!")
p.run(data={"text_embedder": {"text":"You can embed me put I'll return no matching documents"}})
>>> No Documents found with embeddings. Returning empty list. To generate embeddings, use a DocumentEmbedder.
>>> {'retriever': {'documents': []}}
```
"""

Expand All @@ -34,7 +42,7 @@ def __init__(
:param model: The name of the model to use.
:param access_token: The Gradient access token.
:param workspace_id: The Gradient workspace ID.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
:param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
"""
self._host = host
self._model_name = model
Expand All @@ -53,7 +61,10 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> dict:
"""
Serialize the component to a Python dictionary.
Serialize this component to a dictionary.

:returns:
The serialized component as a dictionary.
"""
return default_to_dict(
self,
Expand All @@ -67,13 +78,17 @@ def to_dict(self) -> dict:
def from_dict(cls, data: Dict[str, Any]) -> "GradientTextEmbedder":
"""
Deserialize this component from a dictionary.

:param data: The dictionary representation of this component.
:returns:
The deserialized component instance.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
return default_from_dict(cls, data)

def warm_up(self) -> None:
"""
Load the embedding model.
Initializes the component.
"""
if not hasattr(self, "_embedding_model"):
self._embedding_model = self._gradient.get_embeddings_model(slug=self._model_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,10 @@ class GradientGenerator:
Queries the LLM using Gradient AI's SDK ('gradientai' package).
See [Gradient AI API](https://docs.gradient.ai/docs/sdk-quickstart) for more details.

Usage example:
```python
from haystack_integrations.components.generators.gradient import GradientGenerator

llm = GradientGenerator(base_model_slug="llama2-7b-chat")
davidsbatista marked this conversation as resolved.
Show resolved Hide resolved
llm.warm_up()
print(llm.run(prompt="What is the meaning of life?"))
Expand All @@ -40,17 +43,17 @@ def __init__(
"""
Create a GradientGenerator component.

:param access_token: The Gradient access token. If not provided it's read from the environment
variable GRADIENT_ACCESS_TOKEN.
:param access_token: The Gradient access token as a `Secret`. If not provided it's read from the environment
variable `GRADIENT_ACCESS_TOKEN`.
:param base_model_slug: The base model slug to use.
:param host: The Gradient host. By default it uses https://api.gradient.ai/.
:param host: The Gradient host. By default, it uses [Gradient AI](https://api.gradient.ai/).
:param max_generated_token_count: The maximum number of tokens to generate.
:param model_adapter_id: The model adapter ID to use.
:param temperature: The temperature to use.
:param top_k: The top k to use.
:param top_p: The top p to use.
:param workspace_id: The Gradient workspace ID. If not provided it's read from the environment
variable GRADIENT_WORKSPACE_ID.
:param workspace_id: The Gradient workspace ID as a `Secret`. If not provided it's read from the environment
variable `GRADIENT_WORKSPACE_ID`.
"""
self._access_token = access_token
self._base_model_slug = base_model_slug
Expand Down Expand Up @@ -84,6 +87,9 @@ def __init__(
def to_dict(self) -> Dict[str, Any]:
"""
Serialize this component to a dictionary.

:returns:
The serialized component as a dictionary.
"""
return default_to_dict(
self,
Expand All @@ -102,7 +108,12 @@ def to_dict(self) -> Dict[str, Any]:
def from_dict(cls, data: Dict[str, Any]) -> "GradientGenerator":
"""
Deserialize this component from a dictionary.

:param data: The dictionary representation of this component.
:returns:
The deserialized component instance.
"""

deserialize_secrets_inplace(data["init_parameters"], keys=["access_token", "workspace_id"])
return default_from_dict(cls, data)

Expand Down