Skip to content

Commit

Permalink
Run integration tests in CI
Browse files Browse the repository at this point in the history
  • Loading branch information
maxjakob committed Mar 28, 2024
1 parent 8a30585 commit e4a73ff
Show file tree
Hide file tree
Showing 4 changed files with 136 additions and 57 deletions.
57 changes: 57 additions & 0 deletions .github/workflows/_integration_test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: integration-test

on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"

env:
POETRY_VERSION: "1.7.1"
DOCKER_COMPOSE_YAML: "libs/elasticsearch/integration_tests/docker-compose.yml"

jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: "Integration tests"
steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: integration-test

- name: Install dependencies
shell: bash
run: poetry install --with=test_integration,test

- name: Start containers
shell: bash
run: docker-compose -f "$DOCKER_COMPOSE_YAML" up elasticsearch -d --build

- name: Run integration tests
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.SuperSecret }}
run: make integration_test

- name: Stop containers
if: always()
shell: bash
run: docker-compose -f "$DOCKER_COMPOSE_YAML" down elasticsearch
39 changes: 37 additions & 2 deletions libs/elasticsearch/tests/integration_tests/_test_utilities.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import os
from typing import Any, Dict, List
import time
from typing import Any, Dict, List, Optional

from elastic_transport import Transport
from elasticsearch import Elasticsearch
from elasticsearch import Elasticsearch, NotFoundError

from langchain_elasticsearch._utilities import check_if_model_deployed


def clear_test_indices(es: Elasticsearch) -> None:
Expand Down Expand Up @@ -40,3 +43,35 @@ def perform_request(self, *args, **kwargs): # type: ignore
es = Elasticsearch(hosts=[es_url], transport_class=CustomTransport)

return es


def deploy_model(
es_client: Elasticsearch,
model_id: str = ".elser_model_2",
field: str = "text_field",
model_type: Optional[str] = None,
inference_config: Optional[Dict] = None,
):
try:
check_if_model_deployed(es_client, model_id)
except NotFoundError:
# download the model
es_client.ml.put_trained_model(
model_id=model_id,
input={"field_names": [field]},
model_type=model_type,
inference_config=inference_config,
)

# wait until ready
while True:
status = es_client.ml.get_trained_models(
model_id=model_id, include="definition_status"
)
if status["trained_model_configs"][0]["fully_defined"]:
# model is ready
break
time.sleep(1)

# deploy the model
es_client.ml.start_trained_model_deployment(model_id=model_id)
76 changes: 33 additions & 43 deletions libs/elasticsearch/tests/integration_tests/test_embeddings.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,38 @@
"""Test elasticsearch_embeddings embeddings."""

import pytest
from langchain_core.utils import get_from_env
import os

from elasticsearch import Elasticsearch

from langchain_elasticsearch.embeddings import ElasticsearchEmbeddings

# deployed with
# https://www.elastic.co/guide/en/machine-learning/current/ml-nlp-text-emb-vector-search-example.html
DEFAULT_MODEL = "sentence-transformers__msmarco-minilm-l-12-v3"
DEFAULT_NUM_DIMENSIONS = "384"


@pytest.fixture
def model_id() -> str:
return get_from_env("model_id", "MODEL_ID", DEFAULT_MODEL)


@pytest.fixture
def expected_num_dimensions() -> int:
return int(
get_from_env(
"expected_num_dimensions", "EXPECTED_NUM_DIMENSIONS", DEFAULT_NUM_DIMENSIONS
)
)


def test_elasticsearch_embedding_documents(
model_id: str, expected_num_dimensions: int
) -> None:
"""Test Elasticsearch embedding documents."""
documents = ["foo bar", "bar foo", "foo"]
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == expected_num_dimensions
assert len(output[1]) == expected_num_dimensions
assert len(output[2]) == expected_num_dimensions


def test_elasticsearch_embedding_query(
model_id: str, expected_num_dimensions: int
) -> None:
"""Test Elasticsearch embedding query."""
document = "foo bar"
embedding = ElasticsearchEmbeddings.from_credentials(model_id)
output = embedding.embed_query(document)
assert len(output) == expected_num_dimensions
from ._test_utilities import deploy_model

ES_CLIENT = Elasticsearch(hosts=[os.environ.get("ES_URL", "http://localhost:9200")])
MODEL_ID = ".elser_model_2"


class TestEmbeddings:
@classmethod
def setup_class(cls) -> None:
deploy_model(ES_CLIENT, MODEL_ID)

def test_elasticsearch_embedding_documents(self) -> None:
"""Test Elasticsearch embedding documents."""
documents = ["foo bar", "bar foo", "foo"]
embedding = ElasticsearchEmbeddings(ES_CLIENT.ml, MODEL_ID)
output = embedding.embed_documents(documents)
assert len(output) == 3
assert "foo" in output[0]
assert "##bar" in output[0]
assert "bar" in output[1]
assert "foo" in output[1]
assert "foo" in output[2]

def test_elasticsearch_embedding_query(self) -> None:
"""Test Elasticsearch embedding query."""
document = "foo bar"
embedding = ElasticsearchEmbeddings(ES_CLIENT.ml, MODEL_ID)
output = embedding.embed_query(document)
assert "foo" in output
assert "##bar" in output
21 changes: 9 additions & 12 deletions libs/elasticsearch/tests/integration_tests/test_vectorstores.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
ConsistentFakeEmbeddings,
FakeEmbeddings,
)
from ._test_utilities import clear_test_indices, requests_saving_es_client
from ._test_utilities import clear_test_indices, deploy_model, requests_saving_es_client

logging.basicConfig(level=logging.DEBUG)

Expand All @@ -40,17 +40,11 @@
"""

modelsDeployed: List[str] = [
# ".elser_model_1",
# "sentence-transformers__all-minilm-l6-v2",
]


class TestElasticsearch:
@classmethod
def setup_class(cls) -> None:
if not os.getenv("OPENAI_API_KEY"):
raise ValueError("OPENAI_API_KEY environment variable is not set")

@pytest.fixture(scope="class", autouse=True)
def elasticsearch_connection(self) -> Union[dict, Generator[dict, None, None]]:
es_url = os.environ.get("ES_URL", "http://localhost:9200")
Expand Down Expand Up @@ -708,20 +702,23 @@ def assert_query(query_body: dict, query: str) -> dict:
output = docsearch.similarity_search("bar", k=1)
assert output == [Document(page_content="bar")]

@pytest.mark.skipif(
".elser_model_1" not in modelsDeployed,
reason="ELSER not deployed in ML Node, skipping test",
)
def test_similarity_search_with_sparse_infer_instack(
self, elasticsearch_connection: dict, index_name: str
) -> None:
"""test end to end with sparse retrieval strategy and inference in-stack"""
model_id = ".elser_model_2"

es_client = ElasticsearchStore.connect_to_elasticsearch(
**elasticsearch_connection
)
deploy_model(es_client, model_id)

texts = ["foo", "bar", "baz"]
docsearch = ElasticsearchStore.from_texts(
texts,
**elasticsearch_connection,
index_name=index_name,
strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(),
strategy=ElasticsearchStore.SparseVectorRetrievalStrategy(model_id),
)
output = docsearch.similarity_search("foo", k=1)
assert output == [Document(page_content="foo")]
Expand Down

0 comments on commit e4a73ff

Please sign in to comment.