From ad1a1d68d77ea07c67f8a26d4baf5bef7c399937 Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Wed, 7 Feb 2024 21:11:24 +0530 Subject: [PATCH 1/5] fix: query in text_embedding_base to work with both Iterable and str as users might supply both --- fastembed/text/text_embedding_base.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/fastembed/text/text_embedding_base.py b/fastembed/text/text_embedding_base.py index 669abb5a..819bc733 100644 --- a/fastembed/text/text_embedding_base.py +++ b/fastembed/text/text_embedding_base.py @@ -1,4 +1,4 @@ -from typing import Optional, Union, Iterable, List, Dict, Any +from typing import Any, Dict, Iterable, List, Optional, Union import numpy as np @@ -51,5 +51,7 @@ def query_embed(self, query: str, **kwargs) -> np.ndarray: """ # This is model-specific, so that different models can have specialized implementations - query_embedding = list(self.embed([query], **kwargs))[0] - return query_embedding + if isinstance(query, str): + yield from self.embed([query], **kwargs) + if isinstance(query, Iterable): + yield from self.embed(query, **kwargs) From edf4eff1aafd383362dfda207a175fe01564f3d6 Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Wed, 7 Feb 2024 21:15:29 +0530 Subject: [PATCH 2/5] Fix Qdrant query to align with future usage --- docs/examples/Retrieval_with_FastEmbed.ipynb | 22 ++++++------- docs/examples/Usage_With_Qdrant.ipynb | 33 ++++++++++++-------- 2 files changed, 31 insertions(+), 24 deletions(-) diff --git a/docs/examples/Retrieval_with_FastEmbed.ipynb b/docs/examples/Retrieval_with_FastEmbed.ipynb index 9836fd8a..33ba9e97 100644 --- a/docs/examples/Retrieval_with_FastEmbed.ipynb +++ b/docs/examples/Retrieval_with_FastEmbed.ipynb @@ -21,7 +21,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +37,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -105,7 +105,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -138,7 +138,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -148,7 +148,7 @@ "Rank 1: Maharana Pratap was a Rajput warrior king from Mewar\n", "Rank 2: Maharana Pratap is considered a symbol of Rajput resistance against foreign rule\n", "Rank 3: His legacy is celebrated in Rajasthan through festivals and monuments\n", - "Rank 4: His capital was Chittorgarh, which he lost to the Mughals\n", + "Rank 4: He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar\n", "Rank 5: He fought against the Mughal Empire led by Akbar\n" ] } @@ -166,16 +166,16 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Rank 1: He died in 1597 at the age of 57\n", - "Rank 2: His life has been depicted in various films, TV shows, and books\n", - "Rank 3: Maharana Pratap was a Rajput warrior king from Mewar\n", + "Rank 1: Maharana Pratap was a Rajput warrior king from Mewar\n", + "Rank 2: Maharana Pratap is considered a symbol of Rajput resistance against foreign rule\n", + "Rank 3: His legacy is celebrated in Rajasthan through festivals and monuments\n", "Rank 4: He had 11 wives and 17 sons, including Amar Singh I who succeeded him as ruler of Mewar\n", "Rank 5: He fought against the Mughal Empire led by Akbar\n" ] @@ -213,7 +213,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.11.5" }, "orig_nbformat": 4 }, diff --git a/docs/examples/Usage_With_Qdrant.ipynb b/docs/examples/Usage_With_Qdrant.ipynb index 91d79861..7eef7fbc 100644 --- a/docs/examples/Usage_With_Qdrant.ipynb +++ b/docs/examples/Usage_With_Qdrant.ipynb @@ -102,19 +102,26 @@ "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "100%|██████████| 77.7M/77.7M [00:05<00:00, 14.6MiB/s]\n" + ] + }, { "data": { "text/plain": [ - "['6e8fcf7e0ecc407b9b6bb011d169f629',\n", - " 'c9d26e7e0ea741b2b1082d097796b28b',\n", - " 'cf05747e7eb34d2490b1df1f8be94049',\n", - " '208c197266d547a880dfb65e46738b19',\n", - " '27bd985c5d6f49d68fc2cf73dac74199',\n", - " 'c5e929c8837f4370818c97f63996f8ef',\n", - " 'c12213c6cdac470aa2471f2d30dc4041',\n", - " '974e64a7d8624f6e9824fa7b9c94f99d',\n", - " '0129fae193c740eba092512d8e53ab4a',\n", - " '492cad6e741e4aeebb196bd818a97d17']" + "['4fa8b10c78da4b18ba0830ba8a57367a',\n", + " '2eae04b515ee4e9185a9a0e6be812bba',\n", + " 'c6039f88486f47f1835ae3b069c5823c',\n", + " 'c2c8c51e305144d1917b373125fb4d95',\n", + " '79fd23b9ec0648cdab38d1947c6b933e',\n", + " '036aa200d8c3492b8a438e4f825f5e7f',\n", + " 'c35c77f3ea37460a9a13723fb77b7367',\n", + " '6ebccbca571b40d0ab6e83e5e0f2f562',\n", + " '38048c2ccc1d4962a4f8f1bd89c8357a',\n", + " 'c6b09308360140c7b4f106af3658a31e']" ] }, "execution_count": 4, @@ -187,12 +194,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "[QueryResponse(id='42', embedding=None, metadata={'document': 'Qdrant has Langchain integrations', 'source': 'Langchain-docs'}, document='Qdrant has Langchain integrations', score=0.8496814051311954), QueryResponse(id='2', embedding=None, metadata={'document': 'Qdrant also has Llama Index integrations', 'source': 'Linkedin-docs'}, document='Qdrant also has Llama Index integrations', score=0.8478494193031256)]\n" + "[QueryResponse(id=42, embedding=None, metadata={'document': 'Qdrant has Langchain integrations', 'source': 'Langchain-docs'}, document='Qdrant has Langchain integrations', score=0.8276550115796268), QueryResponse(id=2, embedding=None, metadata={'document': 'Qdrant also has Llama Index integrations', 'source': 'Linkedin-docs'}, document='Qdrant also has Llama Index integrations', score=0.8265536935180283)]\n" ] } ], "source": [ - "search_result = client.query(collection_name=\"demo_collection\", query_text=[\"This is a query document\"])\n", + "search_result = client.query(collection_name=\"demo_collection\", query_text=\"This is a query document\")\n", "print(search_result)" ] }, @@ -226,7 +233,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.17" + "version": "3.11.5" }, "orig_nbformat": 4 }, From 1cfd69236fc3b9ab433162558e4968d05c1b63e9 Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Wed, 7 Feb 2024 21:28:06 +0530 Subject: [PATCH 3/5] * refactor(text_embedding_base.py): change query parameter type from str to Union[str, Iterable[str]] in query_embed method --- fastembed/text/text_embedding_base.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fastembed/text/text_embedding_base.py b/fastembed/text/text_embedding_base.py index 819bc733..3c7fb6e1 100644 --- a/fastembed/text/text_embedding_base.py +++ b/fastembed/text/text_embedding_base.py @@ -39,12 +39,12 @@ def passage_embed(self, texts: Iterable[str], **kwargs) -> Iterable[np.ndarray]: # This is model-specific, so that different models can have specialized implementations yield from self.embed(texts, **kwargs) - def query_embed(self, query: str, **kwargs) -> np.ndarray: + def query_embed(self, query: Union[str, Iterable[str]], **kwargs) -> np.ndarray: """ - Embeds a query + Embeds queries Args: - query (str): The query to search for. + query (Union[str, Iterable[str]]): The query to embed, or an iterable e.g. list of queries. Returns: np.ndarray: The embeddings. From a69f1f298689b8985028e938b9d64c517512d79d Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Wed, 7 Feb 2024 21:36:10 +0530 Subject: [PATCH 4/5] Update return type of query_embed method --- fastembed/text/text_embedding_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastembed/text/text_embedding_base.py b/fastembed/text/text_embedding_base.py index 3c7fb6e1..dc9bd8dd 100644 --- a/fastembed/text/text_embedding_base.py +++ b/fastembed/text/text_embedding_base.py @@ -39,7 +39,7 @@ def passage_embed(self, texts: Iterable[str], **kwargs) -> Iterable[np.ndarray]: # This is model-specific, so that different models can have specialized implementations yield from self.embed(texts, **kwargs) - def query_embed(self, query: Union[str, Iterable[str]], **kwargs) -> np.ndarray: + def query_embed(self, query: Union[str, Iterable[str]], **kwargs) -> Iterable[np.ndarray]: """ Embeds queries From bb724f0365b5cc88cc046253a60bda55db5c072c Mon Sep 17 00:00:00 2001 From: Nirant Kasliwal Date: Wed, 7 Feb 2024 21:41:33 +0530 Subject: [PATCH 5/5] Update return type in TextEmbeddingBase --- fastembed/text/text_embedding_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastembed/text/text_embedding_base.py b/fastembed/text/text_embedding_base.py index dc9bd8dd..49ac9804 100644 --- a/fastembed/text/text_embedding_base.py +++ b/fastembed/text/text_embedding_base.py @@ -47,7 +47,7 @@ def query_embed(self, query: Union[str, Iterable[str]], **kwargs) -> Iterable[np query (Union[str, Iterable[str]]): The query to embed, or an iterable e.g. list of queries. Returns: - np.ndarray: The embeddings. + Iterable[np.ndarray]: The embeddings. """ # This is model-specific, so that different models can have specialized implementations