Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jina - review docstrings #504

Merged
merged 3 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@ class JinaDocumentEmbedder:

Usage example:
```python
import os
from haystack import Document
from jina_haystack import JinaDocumentEmbedder

doc = Document(content="I love pizza!")
from haystack_integrations.components.embedders.jina import JinaDocumentEmbedder

os.environ("JINA_API_KEY") = "YOUR_JINA_API_KEY"
anakin87 marked this conversation as resolved.
Show resolved Hide resolved
document_embedder = JinaDocumentEmbedder()

doc = Document(content="I love pizza!")

result = document_embedder.run([doc])
print(result['documents'][0].embedding)

Expand All @@ -45,9 +47,9 @@ def __init__(
embedding_separator: str = "\n",
):
"""
Create a JinaDocumentEmbedder component.
anakin87 marked this conversation as resolved.
Show resolved Hide resolved
:param api_key: The Jina API key.
:param model: The name of the Jina model to use. Check the list of available models on `https://jina.ai/embeddings/`
:param model: The name of the Jina model to use.
Check the list of available models on [Jina documentation](https://jina.ai/embeddings/).
:param prefix: A string to add to the beginning of each text.
:param suffix: A string to add to the end of each text.
:param batch_size: Number of Documents to encode at once.
Expand Down Expand Up @@ -83,8 +85,9 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> Dict[str, Any]:
"""
This method overrides the default serializer in order to avoid leaking the `api_key` value passed
to the constructor.
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""
return default_to_dict(
self,
Expand All @@ -100,6 +103,13 @@ def to_dict(self) -> Dict[str, Any]:

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentEmbedder":
"""
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data)

Expand Down Expand Up @@ -151,10 +161,13 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List
@component.output_types(documents=List[Document], meta=Dict[str, Any])
def run(self, documents: List[Document]):
"""
Embed a list of Documents.
The embedding of each Document is stored in the `embedding` field of the Document.
Compute the embeddings for a list of Documents.

:param documents: A list of Documents to embed.
:returns: A dictionary with following keys:
- `documents`: List of Documents, each with an `embedding` field containing the computed embedding.
- `meta`: A dictionary with metadata including the model name and usage statistics.
:raises TypeError: If the input is not a list of Documents.
"""
if not isinstance(documents, list) or documents and not isinstance(documents[0], Document):
msg = (
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,19 @@
@component
class JinaTextEmbedder:
"""
A component for embedding strings using Jina models.
A component for embedding strings using Jina AI models.

Usage example:
```python
from jina_haystack import JinaTextEmbedder
import os
from haystack_integrations.components.embedders.jina import JinaTextEmbedder

text_to_embed = "I love pizza!"
os.environ("JINA_API_KEY") = "YOUR_JINA_API_KEY"
anakin87 marked this conversation as resolved.
Show resolved Hide resolved

text_embedder = JinaTextEmbedder()

text_to_embed = "I love pizza!"

print(text_embedder.run(text_to_embed))

# {'embedding': [0.017020374536514282, -0.023255806416273117, ...],
Expand All @@ -39,11 +42,10 @@ def __init__(
suffix: str = "",
):
"""
Create an JinaTextEmbedder component.
anakin87 marked this conversation as resolved.
Show resolved Hide resolved

:param api_key: The Jina API key. It can be explicitly provided or automatically read from the
environment variable JINA_API_KEY (recommended).
:param model: The name of the Jina model to use. Check the list of available models on `https://jina.ai/embeddings/`
environment variable `JINA_API_KEY` (recommended).
:param model: The name of the Jina model to use.
Check the list of available models on [Jina documentation](https://jina.ai/embeddings/).
:param prefix: A string to add to the beginning of each text.
:param suffix: A string to add to the end of each text.
"""
Expand Down Expand Up @@ -71,22 +73,37 @@ def _get_telemetry_data(self) -> Dict[str, Any]:

def to_dict(self) -> Dict[str, Any]:
"""
This method overrides the default serializer in order to avoid leaking the `api_key` value passed
to the constructor.
Serializes the component to a dictionary.
:returns:
Dictionary with serialized data.
"""

return default_to_dict(
self, api_key=self.api_key.to_dict(), model=self.model_name, prefix=self.prefix, suffix=self.suffix
)

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> "JinaTextEmbedder":
"""
Deserializes the component from a dictionary.
:param data:
Dictionary to deserialize from.
:returns:
Deserialized component.
"""
deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"])
return default_from_dict(cls, data)

@component.output_types(embedding=List[float], meta=Dict[str, Any])
def run(self, text: str):
"""Embed a string."""
"""
Embed a string.

:param text: The string to embed.
:returns: A dictionary with following keys:
- `embedding`: The embedding of the input string.
- `meta`: A dictionary with metadata including the model name and usage statistics.
:raises TypeError: If the input is not a string.
"""
if not isinstance(text, str):
msg = (
"JinaTextEmbedder expects a string as an input."
Expand Down