From 8755e4a52c4f6b506a5333968b1189b265d7cfc0 Mon Sep 17 00:00:00 2001 From: paulmartrencharpro <148542350+paulmartrencharpro@users.noreply.github.com> Date: Tue, 19 Nov 2024 17:11:25 +0100 Subject: [PATCH] The name of the model prithvida/Splade_PP_en_v1 has a typo and is being replaced by prithivida/Splade_PP_en_v1? There's a Deprecation warning about it. (#1201) I changed it everywhere in the class & the tests --- .../fastembed_sparse_document_embedder.py | 6 ++-- .../fastembed_sparse_text_embedder.py | 6 ++-- ...test_fastembed_sparse_document_embedder.py | 36 +++++++++---------- .../test_fastembed_sparse_text_embedder.py | 32 ++++++++--------- 4 files changed, 40 insertions(+), 40 deletions(-) diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py index f79f08c90..a30d43cf4 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_document_embedder.py @@ -16,7 +16,7 @@ class FastembedSparseDocumentEmbedder: from haystack.dataclasses import Document sparse_doc_embedder = FastembedSparseDocumentEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", batch_size=32, ) @@ -53,7 +53,7 @@ class FastembedSparseDocumentEmbedder: def __init__( self, - model: str = "prithvida/Splade_PP_en_v1", + model: str = "prithivida/Splade_PP_en_v1", cache_dir: Optional[str] = None, threads: Optional[int] = None, batch_size: int = 32, @@ -68,7 +68,7 @@ def __init__( Create an FastembedDocumentEmbedder component. :param model: Local path or name of the model in Hugging Face's model hub, - such as `prithvida/Splade_PP_en_v1`. + such as `prithivida/Splade_PP_en_v1`. :param cache_dir: The path to the cache directory. Can be set using the `FASTEMBED_CACHE_PATH` env variable. Defaults to `fastembed_cache` in the system's temp directory. diff --git a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py index 2ebab35b4..c7296525f 100644 --- a/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py +++ b/integrations/fastembed/src/haystack_integrations/components/embedders/fastembed/fastembed_sparse_text_embedder.py @@ -19,7 +19,7 @@ class FastembedSparseTextEmbedder: "The disk comes and it does not, only Windows. Do Not order this if you have a Mac!!") sparse_text_embedder = FastembedSparseTextEmbedder( - model="prithvida/Splade_PP_en_v1" + model="prithivida/Splade_PP_en_v1" ) sparse_text_embedder.warm_up() @@ -29,7 +29,7 @@ class FastembedSparseTextEmbedder: def __init__( self, - model: str = "prithvida/Splade_PP_en_v1", + model: str = "prithivida/Splade_PP_en_v1", cache_dir: Optional[str] = None, threads: Optional[int] = None, progress_bar: bool = True, @@ -40,7 +40,7 @@ def __init__( """ Create a FastembedSparseTextEmbedder component. - :param model: Local path or name of the model in Fastembed's model hub, such as `prithvida/Splade_PP_en_v1` + :param model: Local path or name of the model in Fastembed's model hub, such as `prithivida/Splade_PP_en_v1` :param cache_dir: The path to the cache directory. Can be set using the `FASTEMBED_CACHE_PATH` env variable. Defaults to `fastembed_cache` in the system's temp directory. diff --git a/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py b/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py index 90e94908d..7c0de196a 100644 --- a/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_sparse_document_embedder.py @@ -15,8 +15,8 @@ def test_init_default(self): """ Test default initialization parameters for FastembedSparseDocumentEmbedder. """ - embedder = FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1") - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + embedder = FastembedSparseDocumentEmbedder(model="prithivida/Splade_PP_en_v1") + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None assert embedder.batch_size == 32 @@ -31,7 +31,7 @@ def test_init_with_parameters(self): Test custom initialization parameters for FastembedSparseDocumentEmbedder. """ embedder = FastembedSparseDocumentEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", cache_dir="fake_dir", threads=2, batch_size=64, @@ -41,7 +41,7 @@ def test_init_with_parameters(self): meta_fields_to_embed=["test_field"], embedding_separator=" | ", ) - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir == "fake_dir" assert embedder.threads == 2 assert embedder.batch_size == 64 @@ -55,12 +55,12 @@ def test_to_dict(self): """ Test serialization of FastembedSparseDocumentEmbedder to a dictionary, using default initialization parameters. """ - embedder = FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithivida/Splade_PP_en_v1") embedder_dict = embedder.to_dict() assert embedder_dict == { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": None, "threads": None, "batch_size": 32, @@ -78,7 +78,7 @@ def test_to_dict_with_custom_init_parameters(self): Test serialization of FastembedSparseDocumentEmbedder to a dictionary, using custom initialization parameters. """ embedder = FastembedSparseDocumentEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", cache_dir="fake_dir", threads=2, batch_size=64, @@ -92,7 +92,7 @@ def test_to_dict_with_custom_init_parameters(self): assert embedder_dict == { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": "fake_dir", "threads": 2, "batch_size": 64, @@ -113,7 +113,7 @@ def test_from_dict(self): embedder_dict = { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": None, "threads": None, "batch_size": 32, @@ -125,7 +125,7 @@ def test_from_dict(self): }, } embedder = default_from_dict(FastembedSparseDocumentEmbedder, embedder_dict) - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None assert embedder.batch_size == 32 @@ -143,7 +143,7 @@ def test_from_dict_with_custom_init_parameters(self): embedder_dict = { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_document_embedder.FastembedSparseDocumentEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": "fake_dir", "threads": 2, "batch_size": 64, @@ -155,7 +155,7 @@ def test_from_dict_with_custom_init_parameters(self): }, } embedder = default_from_dict(FastembedSparseDocumentEmbedder, embedder_dict) - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir == "fake_dir" assert embedder.threads == 2 assert embedder.batch_size == 64 @@ -172,11 +172,11 @@ def test_warmup(self, mocked_factory): """ Test for checking embedder instances after warm-up. """ - embedder = FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithivida/Splade_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once_with( - model_name="prithvida/Splade_PP_en_v1", + model_name="prithivida/Splade_PP_en_v1", cache_dir=None, threads=None, local_files_only=False, @@ -190,7 +190,7 @@ def test_warmup_does_not_reload(self, mocked_factory): """ Test for checking backend instances after multiple warm-ups. """ - embedder = FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithivida/Splade_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() @@ -211,7 +211,7 @@ def test_embed(self): """ Test for checking output dimensions and embedding dimensions. """ - embedder = FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithivida/Splade_PP_en_v1") embedder.embedding_backend = MagicMock() embedder.embedding_backend.embed = lambda x, **kwargs: self._generate_mocked_sparse_embedding( # noqa: ARG005 len(x) @@ -235,7 +235,7 @@ def test_embed_incorrect_input_format(self): """ Test for checking incorrect input format when creating embedding. """ - embedder = FastembedSparseDocumentEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseDocumentEmbedder(model="prithivida/Splade_PP_en_v1") string_input = "text" list_integers_input = [1, 2, 3] @@ -330,7 +330,7 @@ def test_run_with_model_kwargs(self): @pytest.mark.integration def test_run(self): embedder = FastembedSparseDocumentEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", ) embedder.warm_up() diff --git a/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py b/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py index 4f438fd15..9b73f5f3a 100644 --- a/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py +++ b/integrations/fastembed/tests/test_fastembed_sparse_text_embedder.py @@ -15,8 +15,8 @@ def test_init_default(self): """ Test default initialization parameters for FastembedSparseTextEmbedder. """ - embedder = FastembedSparseTextEmbedder(model="prithvida/Splade_PP_en_v1") - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + embedder = FastembedSparseTextEmbedder(model="prithivida/Splade_PP_en_v1") + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None assert embedder.progress_bar is True @@ -27,13 +27,13 @@ def test_init_with_parameters(self): Test custom initialization parameters for FastembedSparseTextEmbedder. """ embedder = FastembedSparseTextEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", cache_dir="fake_dir", threads=2, progress_bar=False, parallel=1, ) - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir == "fake_dir" assert embedder.threads == 2 assert embedder.progress_bar is False @@ -43,12 +43,12 @@ def test_to_dict(self): """ Test serialization of FastembedSparseTextEmbedder to a dictionary, using default initialization parameters. """ - embedder = FastembedSparseTextEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithivida/Splade_PP_en_v1") embedder_dict = embedder.to_dict() assert embedder_dict == { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": None, "threads": None, "progress_bar": True, @@ -63,7 +63,7 @@ def test_to_dict_with_custom_init_parameters(self): Test serialization of FastembedSparseTextEmbedder to a dictionary, using custom initialization parameters. """ embedder = FastembedSparseTextEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", cache_dir="fake_dir", threads=2, progress_bar=False, @@ -74,7 +74,7 @@ def test_to_dict_with_custom_init_parameters(self): assert embedder_dict == { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": "fake_dir", "threads": 2, "progress_bar": False, @@ -91,7 +91,7 @@ def test_from_dict(self): embedder_dict = { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": None, "threads": None, "progress_bar": True, @@ -99,7 +99,7 @@ def test_from_dict(self): }, } embedder = default_from_dict(FastembedSparseTextEmbedder, embedder_dict) - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir is None assert embedder.threads is None assert embedder.progress_bar is True @@ -112,7 +112,7 @@ def test_from_dict_with_custom_init_parameters(self): embedder_dict = { "type": "haystack_integrations.components.embedders.fastembed.fastembed_sparse_text_embedder.FastembedSparseTextEmbedder", # noqa "init_parameters": { - "model": "prithvida/Splade_PP_en_v1", + "model": "prithivida/Splade_PP_en_v1", "cache_dir": "fake_dir", "threads": 2, "progress_bar": False, @@ -120,7 +120,7 @@ def test_from_dict_with_custom_init_parameters(self): }, } embedder = default_from_dict(FastembedSparseTextEmbedder, embedder_dict) - assert embedder.model_name == "prithvida/Splade_PP_en_v1" + assert embedder.model_name == "prithivida/Splade_PP_en_v1" assert embedder.cache_dir == "fake_dir" assert embedder.threads == 2 assert embedder.progress_bar is False @@ -133,11 +133,11 @@ def test_warmup(self, mocked_factory): """ Test for checking embedder instances after warm-up. """ - embedder = FastembedSparseTextEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithivida/Splade_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() mocked_factory.get_embedding_backend.assert_called_once_with( - model_name="prithvida/Splade_PP_en_v1", + model_name="prithivida/Splade_PP_en_v1", cache_dir=None, threads=None, local_files_only=False, @@ -151,7 +151,7 @@ def test_warmup_does_not_reload(self, mocked_factory): """ Test for checking backend instances after multiple warm-ups. """ - embedder = FastembedSparseTextEmbedder(model="prithvida/Splade_PP_en_v1") + embedder = FastembedSparseTextEmbedder(model="prithivida/Splade_PP_en_v1") mocked_factory.get_embedding_backend.assert_not_called() embedder.warm_up() embedder.warm_up() @@ -252,7 +252,7 @@ def test_run_with_model_kwargs(self): @pytest.mark.integration def test_run(self): embedder = FastembedSparseTextEmbedder( - model="prithvida/Splade_PP_en_v1", + model="prithivida/Splade_PP_en_v1", ) embedder.warm_up()