deepset-ai · anakin87 · Jan 10, 2025 · Jan 6, 2025 · Jan 9, 2025 · Jan 9, 2025
@@ -22,7 +22,7 @@ pip install pgvector-haystack
 
 Ensure that you have a PostgreSQL running with the `pgvector` extension. For a quick setup using Docker, run:
 ```
-docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector
+docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres pgvector/pgvector:pg17
 ```
 
 then run the tests:

@@ -389,7 +389,9 @@ def _handle_hnsw(self):
             )
             return
 
-        sql_drop_index = SQL("DROP INDEX IF EXISTS {index_name}").format(index_name=Identifier(self.hnsw_index_name))
+        sql_drop_index = SQL("DROP INDEX IF EXISTS {schema_name}.{index_name}").format(
+            schema_name=Identifier(self.schema_name), index_name=Identifier(self.hnsw_index_name)
+        )
         self._execute_sql(sql_drop_index, error_msg="Could not drop HNSW index")
 
         self._create_hnsw_index()

@@ -5,6 +5,7 @@
 from unittest.mock import patch
 
 import numpy as np
+import psycopg
 import pytest
 from haystack.dataclasses.document import ByteStream, Document
 from haystack.document_stores.errors import DuplicateDocumentError
@@ -259,3 +260,47 @@ def test_from_pg_to_haystack_documents():
     assert haystack_docs[2].meta == {"meta_key": "meta_value"}
     assert haystack_docs[2].embedding == [0.7, 0.8, 0.9]
     assert haystack_docs[2].score is None
+
+
+@pytest.mark.integration
+def test_hnsw_index_recreation():
+    def get_index_oid(document_store, schema_name, index_name):
+        sql_get_index_oid = """
+            SELECT c.oid
+            FROM pg_class c
+            JOIN pg_namespace n ON n.oid = c.relnamespace
+            WHERE c.relkind = 'i'
+            AND n.nspname = %s
+            AND c.relname = %s;
+        """
+        return document_store.cursor.execute(sql_get_index_oid, (schema_name, index_name)).fetchone()[0]
+
+    # create a new schema
+    connection_string = "postgresql://postgres:postgres@localhost:5432/postgres"
+    schema_name = "test_schema"
+    with psycopg.connect(connection_string, autocommit=True) as conn:
+        conn.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")
+
+    # create a first document store and trigger the creation of the hnsw index
+    params = {
+        "connection_string": Secret.from_token(connection_string),
+        "schema_name": schema_name,
+        "table_name": "haystack_test_hnsw_index_recreation",
+        "search_strategy": "hnsw",
+    }
+    ds1 = PgvectorDocumentStore(**params)
+    ds1._initialize_table()
+
+    # get the hnsw index oid
+    hnws_index_name = "haystack_hnsw_index"
+    first_oid = get_index_oid(ds1, ds1.schema_name, hnws_index_name)
+
+    # create second document store with recreation enabled
+    ds2 = PgvectorDocumentStore(**params, hnsw_recreate_index_if_exists=True)
+    ds2._initialize_table()
+
+    # get the index oid
+    second_oid = get_index_oid(ds2, ds2.schema_name, hnws_index_name)
+
+    # verify that oids differ
+    assert second_oid != first_oid, "Index was not recreated (OID remained the same)"