From 147a3f42c1073f14e2163515d9d0e19c0c0e25ff Mon Sep 17 00:00:00 2001 From: anakin87 Date: Sun, 21 Jan 2024 18:09:15 +0100 Subject: [PATCH] more test cases --- .../pgvector/document_store.py | 25 +++++-------------- .../pgvector/tests/test_document_store.py | 22 +++++++++++++++- 2 files changed, 27 insertions(+), 20 deletions(-) diff --git a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py index d95c0e7fa..afcbfd83f 100644 --- a/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py +++ b/integrations/pgvector/src/haystack_integrations/document_stores/pgvector/document_store.py @@ -322,27 +322,15 @@ def _from_haystack_to_pg_documents(self, documents: List[Document]) -> List[Dict db_documents = [] for document in documents: - db_document = document.to_dict(flatten=False) - db_document.pop("score") - db_document.pop("blob") + db_document = {k: v for k, v in document.to_dict(flatten=False).items() if k not in ["score", "blob"]} blob = document.blob + db_document["blob_data"] = blob.data if blob else None + db_document["blob_meta"] = Json(blob.meta) if blob and blob.meta else None + db_document["blob_mime_type"] = blob.mime_type if blob and blob.mime_type else None - blob_data, blob_meta, blob_mime_type = None, None, None - - if blob: - blob_data = blob.data - if blob.meta: - blob_meta = blob.meta - if blob.mime_type: - blob_mime_type = blob.mime_type - - db_document["blob_data"] = blob_data - db_document["blob_meta"] = Json(blob_meta) - db_document["blob_mime_type"] = blob_mime_type - - db_document["dataframe"] = Json(document.dataframe) if document.dataframe else None - db_document["meta"] = Json(document.meta) + db_document["dataframe"] = Json(db_document["dataframe"]) if db_document["dataframe"] else None + db_document["meta"] = Json(db_document["meta"]) db_documents.append(db_document) @@ -362,7 +350,6 @@ def _from_pg_to_haystack_documents(self, documents: List[Dict[str, Any]]) -> Lis haystack_document = Document.from_dict(haystack_dict) - blob = None if blob_data: blob = ByteStream(data=blob_data, meta=blob_meta, mime_type=blob_mime_type) haystack_document.blob = blob diff --git a/integrations/pgvector/tests/test_document_store.py b/integrations/pgvector/tests/test_document_store.py index 410253960..0b44bdce3 100644 --- a/integrations/pgvector/tests/test_document_store.py +++ b/integrations/pgvector/tests/test_document_store.py @@ -4,11 +4,12 @@ import pytest -from haystack.dataclasses.document import Document +from haystack.dataclasses.document import ByteStream, Document from haystack.document_stores.errors import DuplicateDocumentError from haystack.document_stores.types import DuplicatePolicy from haystack.testing.document_store import CountDocumentsTest, DeleteDocumentsTest, WriteDocumentsTest from haystack_integrations.document_stores.pgvector import PgvectorDocumentStore +from pandas import DataFrame class TestDocumentStore(CountDocumentsTest, WriteDocumentsTest, DeleteDocumentsTest): @@ -39,6 +40,25 @@ def test_write_documents(self, document_store: PgvectorDocumentStore): with pytest.raises(DuplicateDocumentError): document_store.write_documents(docs, DuplicatePolicy.FAIL) + def test_write_blob(self, document_store: PgvectorDocumentStore): + bytestream = ByteStream(b"test", meta={"meta_key": "meta_value"}, mime_type="mime_type") + docs = [Document(id="1", blob=bytestream)] + document_store.write_documents(docs) + + # TODO: update when filters are implemented + retrieved_docs = document_store.filter_documents() + assert retrieved_docs == docs + + def test_write_dataframe(self, document_store: PgvectorDocumentStore): + dataframe = DataFrame({"col1": [1, 2], "col2": [3, 4]}) + docs = [Document(id="1", dataframe=dataframe)] + + document_store.write_documents(docs) + + # TODO: update when filters are implemented + retrieved_docs = document_store.filter_documents() + assert retrieved_docs == docs + def test_init(self): document_store = PgvectorDocumentStore( connection_string="postgresql://postgres:postgres@localhost:5432/postgres",