Skip to content

Commit

Permalink
[feature/semantic-text] Add query and model compatibility tests (#107256
Browse files Browse the repository at this point in the history
)
  • Loading branch information
carlosdelest authored Apr 9, 2024
1 parent 81c864c commit 4d24bf0
Show file tree
Hide file tree
Showing 4 changed files with 299 additions and 10 deletions.
2 changes: 1 addition & 1 deletion x-pack/plugin/inference/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test'

restResources {
restApi {
include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex'
include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex', 'search'
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,15 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio
try {
conflicts.check();
} catch (Exception exc) {
throw new DocumentParsingException(xContentLocation, "Incompatible model_settings", exc);
throw new DocumentParsingException(
xContentLocation,
"Incompatible model settings for field ["
+ name()
+ "]. Check that the "
+ INFERENCE_ID_FIELD.getPreferredName()
+ " is not using different model settings",
exc
);
}
mapper = this;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ setup:
"task_settings": {
}
}
- do:
inference.put_model:
task_type: text_embedding
Expand Down Expand Up @@ -50,19 +51,109 @@ setup:
type: text

---
"Sparse vector results format":
"Dense vector results are indexed as nested chunks and searchable":
- do:
bulk:
index: test-index
refresh: true
body: |
{"index":{}}
{"dense_field": ["you know, for testing", "now with chunks"]}
{"index":{}}
{"dense_field": ["some more tests", "that include chunks"]}
- do:
index:
search:
index: test-index
id: doc_1
body:
sparse_field: "you know, for testing"
query:
nested:
path: dense_field.inference.chunks
query:
knn:
field: dense_field.inference.chunks.embeddings
query_vector_builder:
text_embedding:
model_id: dense-inference-id
model_text: "you know, for testing"

- match: { hits.total.value: 2 }
- match: { hits.total.relation: eq }

# Search with inner hits
- do:
search:
_source: false
index: test-index
body:
query:
nested:
path: dense_field.inference.chunks
inner_hits:
_source: false
fields: [dense_field.inference.chunks.text]
query:
knn:
field: dense_field.inference.chunks.embeddings
query_vector_builder:
text_embedding:
model_id: dense-inference-id
model_text: "you know, for testing"

- match: { hits.total.value: 2 }
- match: { hits.total.relation: eq }
- match: { hits.hits.0.inner_hits.dense_field\.inference\.chunks.hits.total.value: 2 }
- exists: hits.hits.0.inner_hits.dense_field\.inference\.chunks.hits.hits.0.fields.dense_field\.inference\.chunks.0.text
- exists: hits.hits.0.inner_hits.dense_field\.inference\.chunks.hits.hits.1.fields.dense_field\.inference\.chunks.0.text


---
"Dense vector results format":
"Sparse vector results are indexed as nested chunks and searchable":
- do:
index:
bulk:
index: test-index
refresh: true
body: |
{"index":{}}
{"sparse_field": ["you know, for testing", "now with chunks"]}
{"index":{}}
{"sparse_field": ["some more tests", "that include chunks"]}
- do:
search:
index: test-index
body:
query:
nested:
path: sparse_field.inference.chunks
query:
text_expansion:
sparse_field.inference.chunks.embeddings:
model_id: sparse-inference-id
model_text: "you know, for testing"

# Search with inner hits
- do:
search:
_source: false
index: test-index
id: doc_1
body:
dense_field: "you know, for testing"
query:
nested:
path: sparse_field.inference.chunks
inner_hits:
_source: false
fields: [sparse_field.inference.chunks.text]
query:
text_expansion:
sparse_field.inference.chunks.embeddings:
model_id: sparse-inference-id
model_text: "you know, for testing"

- match: { hits.total.value: 2 }
- match: { hits.total.relation: eq }
- match: { hits.hits.0.inner_hits.sparse_field\.inference\.chunks.hits.total.value: 2 }
- exists: hits.hits.0.inner_hits.sparse_field\.inference\.chunks.hits.hits.0.fields.sparse_field\.inference\.chunks.0.text
- exists: hits.hits.0.inner_hits.sparse_field\.inference\.chunks.hits.hits.1.fields.sparse_field\.inference\.chunks.0.text


Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
setup:
- skip:
version: " - 8.12.99"
reason: semantic_text introduced in 8.13.0 # TODO change when 8.13.0 is released

- do:
inference.put_model:
task_type: sparse_embedding
inference_id: sparse-inference-id
body: >
{
"service": "test_service",
"service_settings": {
"model": "my_model",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
inference.put_model:
task_type: text_embedding
inference_id: dense-inference-id
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 10,
"similarity": "cosine",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
indices.create:
index: test-index
body:
mappings:
properties:
sparse_field:
type: semantic_text
inference_id: sparse-inference-id
dense_field:
type: semantic_text
inference_id: dense-inference-id

# Index a doc to set mappings internally
- do:
index:
index: test-index
id: doc_1
body:
dense_field: "inference test"
sparse_field: "another inference test"

---
"Fails for non-compatible dimensions":

- do:
inference.delete_model:
task_type: text_embedding
inference_id: dense-inference-id

- do:
inference.put_model:
task_type: text_embedding
inference_id: dense-inference-id
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 20,
"similarity": "cosine",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
catch: /Incompatible model settings for field \[dense_field\].+/
index:
index: test-index
id: doc_2
body:
dense_field: "some other test"

---
"Fails for non-compatible similarity":

- do:
inference.delete_model:
task_type: text_embedding
inference_id: dense-inference-id

- do:
inference.put_model:
task_type: text_embedding
inference_id: dense-inference-id
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 10,
"similarity": "dot_product",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
catch: /Incompatible model settings for field \[dense_field\].+/
index:
index: test-index
id: doc_2
body:
dense_field: "some other test"

---
"Fails for non-compatible task type for dense vectors":

- do:
inference.delete_model:
task_type: text_embedding
inference_id: dense-inference-id


- do:
inference.put_model:
task_type: sparse_embedding
inference_id: dense-inference-id
body: >
{
"service": "test_service",
"service_settings": {
"model": "my_model",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
catch: /Incompatible model settings for field \[dense_field\].+/
index:
index: test-index
id: doc_2
body:
dense_field: "some other test"

---
"Fails for non-compatible task type for sparse vectors":

- do:
inference.delete_model:
task_type: sparse_embedding
inference_id: sparse-inference-id

- do:
inference.put_model:
task_type: text_embedding
inference_id: sparse-inference-id
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 10,
"similarity": "cosine",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
catch: /Incompatible model settings for field \[sparse_field\].+/
index:
index: test-index
id: doc_2
body:
sparse_field: "some other test"


0 comments on commit 4d24bf0

Please sign in to comment.