Skip to content

Commit

Permalink
Enable testing index Weaviate (#790)
Browse files Browse the repository at this point in the history
PR that enables testing the weaviate indexing component 

Leverage the embedded weaviate component to setup the client and bypass
the docker-compose setup

https://weaviate.io/developers/weaviate/installation/embedded
  • Loading branch information
PhilippeMoussalli authored Jan 17, 2024
1 parent d1c4313 commit 48fbbd4
Show file tree
Hide file tree
Showing 5 changed files with 80 additions and 4 deletions.
13 changes: 9 additions & 4 deletions components/index_weaviate/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,14 @@ ARG FONDANT_VERSION=main
RUN pip3 install fondant[component,aws,azure,gcp]@git+https://github.com/ml6team/fondant@${FONDANT_VERSION}

# Set the working directory to the component folder
WORKDIR /component/src
WORKDIR /component
COPY src/ src/

# Copy over src-files
COPY src/ .
FROM base as test
COPY tests/ tests/
RUN pip3 install --no-cache-dir -r tests/requirements.txt
RUN python -m pytest tests

ENTRYPOINT ["fondant", "execute", "main"]
FROM base
WORKDIR /component/src
ENTRYPOINT ["fondant", "execute", "main"]
7 changes: 7 additions & 0 deletions components/index_weaviate/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,10 @@ dataset.write(
)
```

<a id="index_weaviate#testing"></a>
## Testing

You can run the tests using docker with BuildKit. From this directory, run:
```
docker build . --target test
```
2 changes: 2 additions & 0 deletions components/index_weaviate/tests/pytest.ini
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[pytest]
pythonpath = ../src
3 changes: 3 additions & 0 deletions components/index_weaviate/tests/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
numpy==1.24.4
pytest==7.4.2

59 changes: 59 additions & 0 deletions components/index_weaviate/tests/test_component.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import typing as t

import dask.dataframe as dd
import numpy as np
import pandas as pd
import weaviate
from weaviate.embedded import EmbeddedOptions

from src.main import IndexWeaviateComponent


def get_written_objects(
client: weaviate.Client,
class_name: str,
) -> t.List[t.Dict[str, t.Any]]:
"""Taken from https://weaviate.io/developers/weaviate/manage-data/read-all-objects."""
query = (
client.query.get(
class_name,
["id_", "passage"],
)
.with_additional(["id vector"])
.with_limit(8)
)

result = query.do()

return result["data"]["Get"][class_name]


def test_index_weaviate_component(monkeypatch):
client = weaviate.Client(embedded_options=EmbeddedOptions())

pandas_df = pd.DataFrame(
[
("Lorem ipsum dolor", np.array([1.0, 2.0])),
("ligula eget dolor", np.array([2.0, 3.0])),
],
columns=["text", "embedding"],
)
dask_df = dd.from_pandas(pandas_df, npartitions=2)

index_component = IndexWeaviateComponent(
weaviate_url="http://localhost:6666", # local weaviate instance running on port 6666
batch_size=10,
dynamic=True,
num_workers=2,
overwrite=True,
class_name="TestClass",
vectorizer=None,
)

index_component.write(dask_df)
written_objects = get_written_objects(client, "TestClass")

for _object in written_objects:
matching_row = pandas_df.loc[int(_object["id_"])]
assert matching_row.text == _object["passage"]
assert matching_row.embedding.tolist() == _object["_additional"]["vector"]

0 comments on commit 48fbbd4

Please sign in to comment.