Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: PgvectorDocumentStore - use appropriate schema name if dropping index #1277

Merged
merged 4 commits into from
Jan 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion integrations/pgvector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ pip install pgvector-haystack

Ensure that you have a PostgreSQL running with the `pgvector` extension. For a quick setup using Docker, run:
```
docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres ankane/pgvector
docker run -d -p 5432:5432 -e POSTGRES_USER=postgres -e POSTGRES_PASSWORD=postgres -e POSTGRES_DB=postgres pgvector/pgvector:pg17
```

then run the tests:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ def _handle_hnsw(self):
)
return

sql_drop_index = SQL("DROP INDEX IF EXISTS {index_name}").format(index_name=Identifier(self.hnsw_index_name))
sql_drop_index = SQL("DROP INDEX IF EXISTS {schema_name}.{index_name}").format(
schema_name=Identifier(self.schema_name), index_name=Identifier(self.hnsw_index_name)
)
self._execute_sql(sql_drop_index, error_msg="Could not drop HNSW index")

self._create_hnsw_index()
Expand Down
45 changes: 45 additions & 0 deletions integrations/pgvector/tests/test_document_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from unittest.mock import patch

import numpy as np
import psycopg
import pytest
from haystack.dataclasses.document import ByteStream, Document
from haystack.document_stores.errors import DuplicateDocumentError
Expand Down Expand Up @@ -259,3 +260,47 @@ def test_from_pg_to_haystack_documents():
assert haystack_docs[2].meta == {"meta_key": "meta_value"}
assert haystack_docs[2].embedding == [0.7, 0.8, 0.9]
assert haystack_docs[2].score is None


@pytest.mark.integration
def test_hnsw_index_recreation():
def get_index_oid(document_store, schema_name, index_name):
sql_get_index_oid = """
SELECT c.oid
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind = 'i'
AND n.nspname = %s
AND c.relname = %s;
"""
return document_store.cursor.execute(sql_get_index_oid, (schema_name, index_name)).fetchone()[0]

# create a new schema
connection_string = "postgresql://postgres:postgres@localhost:5432/postgres"
schema_name = "test_schema"
with psycopg.connect(connection_string, autocommit=True) as conn:
conn.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_name}")

# create a first document store and trigger the creation of the hnsw index
params = {
"connection_string": Secret.from_token(connection_string),
"schema_name": schema_name,
"table_name": "haystack_test_hnsw_index_recreation",
"search_strategy": "hnsw",
}
ds1 = PgvectorDocumentStore(**params)
ds1._initialize_table()

# get the hnsw index oid
hnws_index_name = "haystack_hnsw_index"
first_oid = get_index_oid(ds1, ds1.schema_name, hnws_index_name)

# create second document store with recreation enabled
ds2 = PgvectorDocumentStore(**params, hnsw_recreate_index_if_exists=True)
ds2._initialize_table()

# get the index oid
second_oid = get_index_oid(ds2, ds2.schema_name, hnws_index_name)

# verify that oids differ
assert second_oid != first_oid, "Index was not recreated (OID remained the same)"
Loading