diff --git a/integrations/azure_ai_search/example/document_store.py b/integrations/azure_ai_search/example/document_store.py new file mode 100644 index 000000000..ac490aa5e --- /dev/null +++ b/integrations/azure_ai_search/example/document_store.py @@ -0,0 +1,37 @@ +from haystack import Document +from haystack.document_stores.types import DuplicatePolicy + +from haystack_integrations.document_stores.azure_ai_search import AzureAISearchDocumentStore + +document_store = AzureAISearchDocumentStore( + metadata_fields={"version": float, "label": str}, + index_name="document-store-example", +) + +documents = [ + Document( + content="Use pip to install a basic version of Haystack's latest release: pip install farm-haystack.", + meta={"version": 1.15, "label": "first"}, + ), + Document( + content="Use pip to install a Haystack's latest release: pip install farm-haystack[inference].", + meta={"version": 1.22, "label": "second"}, + ), + Document( + content="Use pip to install only the Haystack 2.0 code: pip install haystack-ai.", + meta={"version": 2.0, "label": "third"}, + ), +] +document_store.write_documents(documents, policy=DuplicatePolicy.SKIP) + +filters = { + "operator": "AND", + "conditions": [ + {"field": "meta.version", "operator": ">", "value": 1.21}, + {"field": "meta.label", "operator": "in", "value": ["first", "third"]}, + ], +} + +results = document_store.filter_documents(filters) +for doc in results: + print(doc) diff --git a/integrations/azure_ai_search/example/embedding_retrieval.py b/integrations/azure_ai_search/example/embedding_retrieval.py new file mode 100644 index 000000000..f026d77b8 --- /dev/null +++ b/integrations/azure_ai_search/example/embedding_retrieval.py @@ -0,0 +1,37 @@ +from haystack import Document, Pipeline +from haystack.components.embedders import SentenceTransformersDocumentEmbedder, SentenceTransformersTextEmbedder +from haystack.document_stores.types import DuplicatePolicy + +from haystack_integrations.components.retrievers.azure_ai_search import AzureAISearchEmbeddingRetriever +from haystack_integrations.document_stores.azure_ai_search import AzureAISearchDocumentStore + +document_store = AzureAISearchDocumentStore() + +model = "sentence-transformers/all-mpnet-base-v2" + +documents = [ + Document(content="There are over 7,000 languages spoken around the world today."), + Document( + content="""Elephants have been observed to behave in a way that indicates a + high level of self-awareness, such as recognizing themselves in mirrors.""" + ), + Document( + content="""In certain parts of the world, like the Maldives, Puerto Rico, and + San Diego, you can witness the phenomenon of bioluminescent waves.""" + ), +] + +document_embedder = SentenceTransformersDocumentEmbedder(model=model) +document_embedder.warm_up() +documents_with_embeddings = document_embedder.run(documents) +document_store.write_documents(documents_with_embeddings.get("documents"), policy=DuplicatePolicy.SKIP) +query_pipeline = Pipeline() +query_pipeline.add_component("text_embedder", SentenceTransformersTextEmbedder(model=model)) +query_pipeline.add_component("retriever", AzureAISearchEmbeddingRetriever(document_store=document_store)) +query_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") + +query = "How many languages are there?" + +result = query_pipeline.run({"text_embedder": {"text": query}}) + +print(result["retriever"]["documents"][0]) diff --git a/integrations/azure_ai_search/pyproject.toml b/integrations/azure_ai_search/pyproject.toml index 08c44c3c9..c90ebfc5d 100644 --- a/integrations/azure_ai_search/pyproject.toml +++ b/integrations/azure_ai_search/pyproject.toml @@ -7,7 +7,7 @@ name = "azure-ai-search-haystack" dynamic = ["version"] description = 'Haystack 2.x Document Store for Azure AI Search' readme = "README.md" -requires-python = ">=3.8" +requires-python = ">=3.8,<3.13" license = "Apache-2.0" keywords = [] authors = [{ name = "deepset", email = "info@deepset.ai" }] @@ -22,7 +22,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.0", "azure-search-documents>=11.5", "azure-identity"] +dependencies = ["haystack-ai", "azure-search-documents>=11.5", "azure-identity", "torch>=1.11.0"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/azure_ai_search#readme" @@ -130,6 +130,7 @@ unfixable = [ # Don't touch unused imports "F401", ] +exclude = ["example"] [tool.ruff.lint.isort] known-first-party = ["src"] @@ -140,6 +141,7 @@ ban-relative-imports = "parents" [tool.ruff.lint.per-file-ignores] # Tests can use magic values, assertions, and relative imports "tests/**/*" = ["PLR2004", "S101", "TID252", "S311"] +"example/**/*" = ["T201"] [tool.coverage.run] source = ["haystack_integrations"]