diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index 87655a5ec..648ae88af 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -389,7 +389,9 @@ def _handle_hnsw(self): ) return - sql_drop_index = SQL("DROP INDEX IF EXISTS {index_name}").format(index_name=Identifier(self.hnsw_index_name)) + sql_drop_index = SQL("DROP INDEX IF EXISTS {schema_name}.{index_name}").format( + schema_name=Identifier(self.schema_name), index_name=Identifier(self.hnsw_index_name) + ) self._execute_sql(sql_drop_index, error_msg="Could not drop HNSW index") self._create_hnsw_index() diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index baa921137..fadf16876 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -2,9 +2,13 @@ # # SPDX-License-Identifier: Apache-2.0 +import os +import random +import string from unittest.mock import patch import numpy as np +import psycopg import pytest from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DuplicateDocumentError @@ -259,3 +263,41 @@ def test_from_pg_to_haystack_documents(): assert haystack_docs[2].meta == {"meta_key": "meta_value"} assert haystack_docs[2].embedding == [0.7, 0.8, 0.9] assert haystack_docs[2].score is None + + +@pytest.mark.integration +def test_hnsw_index_recreation_in_new_schema(): + # Set your Postgres connection string (or set PG_CONN_STR in your environment directly). + os.environ["PG_CONN_STR"] = "postgresql://postgres:postgres@localhost:5432/postgres" + + table_name = "test_table" + index_name = f"{table_name}_index" + schema_name = "".join(random.choices(string.ascii_letters, k=8)).lower() # noqa: S311 + embedding_dimension = 1024 + + # Create the new schema if it doesn't exist. + with psycopg.connect(os.environ["PG_CONN_STR"]) as connection: + with connection.cursor() as cursor: + cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name};") + connection.commit() + + # Instantiate the document store in the new schema with HNSW indexing. + document_store = PgvectorDocumentStore( + embedding_dimension=embedding_dimension, + schema_name=schema_name, + vector_function="cosine_similarity", + recreate_table=False, + search_strategy="hnsw", + table_name=table_name, + hnsw_index_name=index_name, + hnsw_recreate_index_if_exists=True, # This ensures we drop/re-create the index if it exists + keyword_index_name=f"{table_name}_keyword_index", + ) + + # First write documents + docs1 = [Document(content="Test Content 1", embedding=[0.8] * embedding_dimension)] + document_store.write_documents(docs1) + + # Second write documents + docs2 = [Document(content="Test Content 2", embedding=[0.7] * embedding_dimension)] + document_store.write_documents(docs2)