deepset-ai · mabartcz · Jan 6, 2025 · Jan 9, 2025 · Jan 9, 2025 · anakin87
@@ -389,9 +389,23 @@ def _handle_hnsw(self):
             )
             return
 
-        sql_drop_index = SQL("DROP INDEX IF EXISTS {index_name}").format(index_name=Identifier(self.hnsw_index_name))
+        sql_drop_index = SQL("DROP INDEX IF EXISTS {schema_name}.{index_name}").format(
+            schema_name=Identifier(self.schema_name), index_name=Identifier(self.hnsw_index_name)
+        )
         self._execute_sql(sql_drop_index, error_msg="Could not drop HNSW index")
 
+        index_exists_after_deletion = bool(
+            self._execute_sql(
+                "SELECT 1 FROM pg_indexes WHERE schemaname = %s AND tablename = %s AND indexname = %s",
+                (self.schema_name, self.table_name, self.hnsw_index_name),
+                "Could not check if HNSW index exists",
+            ).fetchone()
+        )
+
+        if index_exists and index_exists_after_deletion:
+            error_message = "The HNSW was not dropped"
+            raise DocumentStoreError(error_message)
+
         self._create_hnsw_index()
 
     def _create_hnsw_index(self):

@@ -2,9 +2,13 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import os
+import random
+import string
 from unittest.mock import patch
 
 import numpy as np
+import psycopg
 import pytest
 from haystack.dataclasses.document import ByteStream, Document
 from haystack.document_stores.errors import DuplicateDocumentError
@@ -259,3 +263,41 @@ def test_from_pg_to_haystack_documents():
     assert haystack_docs[2].meta == {"meta_key": "meta_value"}
     assert haystack_docs[2].embedding == [0.7, 0.8, 0.9]
     assert haystack_docs[2].score is None
+
+
+@pytest.mark.integration
+def test_hnsw_index_recreation_in_new_schema():
+    # Set your Postgres connection string (or set PG_CONN_STR in your environment directly).
+    os.environ["PG_CONN_STR"] = "postgresql://postgres:postgres@localhost:5432/postgres"
+
+    table_name = "test_table"
+    index_name = f"{table_name}_index"
+    schema_name = "".join(random.choices(string.ascii_letters, k=8)).lower()  # noqa: S311
+    embedding_dimension = 1024
+
+    # Create the new schema if it doesn't exist.
+    with psycopg.connect(os.environ["PG_CONN_STR"]) as connection:
+        with connection.cursor() as cursor:
+            cursor.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name};")
+            connection.commit()
+
+    # Instantiate the document store in the new schema with HNSW indexing.
+    document_store = PgvectorDocumentStore(
+        embedding_dimension=embedding_dimension,
+        schema_name=schema_name,
+        vector_function="cosine_similarity",
+        recreate_table=False,
+        search_strategy="hnsw",
+        table_name=table_name,
+        hnsw_index_name=index_name,
+        hnsw_recreate_index_if_exists=True,  # This ensures we drop/re-create the index if it exists
+        keyword_index_name=f"{table_name}_keyword_index",
+    )
+
+    # First write documents
+    docs1 = [Document(content="Test Content 1", embedding=[0.8] * embedding_dimension)]
+    document_store.write_documents(docs1)
+
+    # Second write documents
+    docs2 = [Document(content="Test Content 2", embedding=[0.7] * embedding_dimension)]
+    document_store.write_documents(docs2)