diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py index 2ecc01bc3..6bcd94220 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/document_embedder.py @@ -20,12 +20,14 @@ class JinaDocumentEmbedder: Usage example: ```python from haystack import Document - from jina_haystack import JinaDocumentEmbedder + from haystack_integrations.components.embedders.jina import JinaDocumentEmbedder - doc = Document(content="I love pizza!") + # Make sure that the environment variable JINA_API_KEY is set document_embedder = JinaDocumentEmbedder() + doc = Document(content="I love pizza!") + result = document_embedder.run([doc]) print(result['documents'][0].embedding) @@ -46,8 +48,10 @@ def __init__( ): """ Create a JinaDocumentEmbedder component. + :param api_key: The Jina API key. - :param model: The name of the Jina model to use. Check the list of available models on `https://jina.ai/embeddings/` + :param model: The name of the Jina model to use. + Check the list of available models on [Jina documentation](https://jina.ai/embeddings/). :param prefix: A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. :param batch_size: Number of Documents to encode at once. @@ -83,8 +87,9 @@ def _get_telemetry_data(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]: """ - This method overrides the default serializer in order to avoid leaking the `api_key` value passed - to the constructor. + Serializes the component to a dictionary. + :returns: + Dictionary with serialized data. """ return default_to_dict( self, @@ -100,6 +105,13 @@ def to_dict(self) -> Dict[str, Any]: @classmethod def from_dict(cls, data: Dict[str, Any]) -> "JinaDocumentEmbedder": + """ + Deserializes the component from a dictionary. + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @@ -151,10 +163,13 @@ def _embed_batch(self, texts_to_embed: List[str], batch_size: int) -> Tuple[List @component.output_types(documents=List[Document], meta=Dict[str, Any]) def run(self, documents: List[Document]): """ - Embed a list of Documents. - The embedding of each Document is stored in the `embedding` field of the Document. + Compute the embeddings for a list of Documents. :param documents: A list of Documents to embed. + :returns: A dictionary with following keys: + - `documents`: List of Documents, each with an `embedding` field containing the computed embedding. + - `meta`: A dictionary with metadata including the model name and usage statistics. + :raises TypeError: If the input is not a list of Documents. """ if not isinstance(documents, list) or documents and not isinstance(documents[0], Document): msg = ( diff --git a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py index f99882f13..6398122a4 100644 --- a/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py +++ b/integrations/jina/src/haystack_integrations/components/embedders/jina/text_embedder.py @@ -13,16 +13,18 @@ @component class JinaTextEmbedder: """ - A component for embedding strings using Jina models. + A component for embedding strings using Jina AI models. Usage example: ```python - from jina_haystack import JinaTextEmbedder + from haystack_integrations.components.embedders.jina import JinaTextEmbedder - text_to_embed = "I love pizza!" + # Make sure that the environment variable JINA_API_KEY is set text_embedder = JinaTextEmbedder() + text_to_embed = "I love pizza!" + print(text_embedder.run(text_to_embed)) # {'embedding': [0.017020374536514282, -0.023255806416273117, ...], @@ -39,11 +41,12 @@ def __init__( suffix: str = "", ): """ - Create an JinaTextEmbedder component. + Create a JinaTextEmbedder component. :param api_key: The Jina API key. It can be explicitly provided or automatically read from the - environment variable JINA_API_KEY (recommended). - :param model: The name of the Jina model to use. Check the list of available models on `https://jina.ai/embeddings/` + environment variable `JINA_API_KEY` (recommended). + :param model: The name of the Jina model to use. + Check the list of available models on [Jina documentation](https://jina.ai/embeddings/). :param prefix: A string to add to the beginning of each text. :param suffix: A string to add to the end of each text. """ @@ -71,22 +74,37 @@ def _get_telemetry_data(self) -> Dict[str, Any]: def to_dict(self) -> Dict[str, Any]: """ - This method overrides the default serializer in order to avoid leaking the `api_key` value passed - to the constructor. + Serializes the component to a dictionary. + :returns: + Dictionary with serialized data. """ - return default_to_dict( self, api_key=self.api_key.to_dict(), model=self.model_name, prefix=self.prefix, suffix=self.suffix ) @classmethod def from_dict(cls, data: Dict[str, Any]) -> "JinaTextEmbedder": + """ + Deserializes the component from a dictionary. + :param data: + Dictionary to deserialize from. + :returns: + Deserialized component. + """ deserialize_secrets_inplace(data["init_parameters"], keys=["api_key"]) return default_from_dict(cls, data) @component.output_types(embedding=List[float], meta=Dict[str, Any]) def run(self, text: str): - """Embed a string.""" + """ + Embed a string. + + :param text: The string to embed. + :returns: A dictionary with following keys: + - `embedding`: The embedding of the input string. + - `meta`: A dictionary with metadata including the model name and usage statistics. + :raises TypeError: If the input is not a string. + """ if not isinstance(text, str): msg = ( "JinaTextEmbedder expects a string as an input."