From 937572d00699223e7e5c936d1024ec2daba0e286 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 9 Apr 2024 10:11:40 +0200 Subject: [PATCH 1/3] Add first query tests --- x-pack/plugin/inference/build.gradle | 2 +- .../20_semantic_text_field_mapper.yml | 59 ++++++++++++++++--- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/x-pack/plugin/inference/build.gradle b/x-pack/plugin/inference/build.gradle index 781261c330e78..a2811ffefca91 100644 --- a/x-pack/plugin/inference/build.gradle +++ b/x-pack/plugin/inference/build.gradle @@ -10,7 +10,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test' restResources { restApi { - include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex' + include '_common', 'bulk', 'indices', 'inference', 'index', 'get', 'update', 'reindex', 'search' } } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml index 27f233436b925..b574aeec632ae 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml @@ -17,6 +17,7 @@ setup: "task_settings": { } } + - do: inference.put_model: task_type: text_embedding @@ -50,19 +51,61 @@ setup: type: text --- -"Sparse vector results format": +"Dense vector results are indexed as nested chunks and searchable": + - do: + bulk: + index: test-index + refresh: true + body: | + {"index":{}} + {"dense_field": "you know, for testing"} + {"index":{}} + {"dense_field": "some more tests"} + - do: - index: + search: index: test-index - id: doc_1 body: - sparse_field: "you know, for testing" + query: + nested: + path: dense_field.inference.chunks + query: + knn: + field: dense_field.inference.chunks.embeddings + query_vector_builder: + text_embedding: + model_id: dense-inference-id + model_text: "you know, for testing" + + - match: { hits.total.value: 2 } + - match: { hits.total.relation: eq } --- -"Dense vector results format": +"Sparse vector results are indexed as nested chunks and searchable": - do: - index: + bulk: + index: test-index + refresh: true + body: | + {"index":{}} + {"sparse_field": "you know, for testing"} + {"index":{}} + {"sparse_field": "some more tests"} + + - do: + search: index: test-index - id: doc_1 body: - dense_field: "you know, for testing" + query: + nested: + path: sparse_field.inference.chunks + query: + text_expansion: + sparse_field.inference.chunks.embeddings: + model_id: sparse-inference-id + model_text: "you know, for testing" + + - match: { hits.total.value: 2 } + - match: { hits.total.relation: eq } + + From bef2214c4eb65429711708e5137fe65182fc2b1e Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 9 Apr 2024 10:57:00 +0200 Subject: [PATCH 2/3] Add inner_hits tests --- .../20_semantic_text_field_mapper.yml | 56 +++++++++++++++++-- 1 file changed, 52 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml index b574aeec632ae..df5073cfed525 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/20_semantic_text_field_mapper.yml @@ -58,9 +58,9 @@ setup: refresh: true body: | {"index":{}} - {"dense_field": "you know, for testing"} + {"dense_field": ["you know, for testing", "now with chunks"]} {"index":{}} - {"dense_field": "some more tests"} + {"dense_field": ["some more tests", "that include chunks"]} - do: search: @@ -80,6 +80,33 @@ setup: - match: { hits.total.value: 2 } - match: { hits.total.relation: eq } + # Search with inner hits + - do: + search: + _source: false + index: test-index + body: + query: + nested: + path: dense_field.inference.chunks + inner_hits: + _source: false + fields: [dense_field.inference.chunks.text] + query: + knn: + field: dense_field.inference.chunks.embeddings + query_vector_builder: + text_embedding: + model_id: dense-inference-id + model_text: "you know, for testing" + + - match: { hits.total.value: 2 } + - match: { hits.total.relation: eq } + - match: { hits.hits.0.inner_hits.dense_field\.inference\.chunks.hits.total.value: 2 } + - exists: hits.hits.0.inner_hits.dense_field\.inference\.chunks.hits.hits.0.fields.dense_field\.inference\.chunks.0.text + - exists: hits.hits.0.inner_hits.dense_field\.inference\.chunks.hits.hits.1.fields.dense_field\.inference\.chunks.0.text + + --- "Sparse vector results are indexed as nested chunks and searchable": - do: @@ -88,17 +115,35 @@ setup: refresh: true body: | {"index":{}} - {"sparse_field": "you know, for testing"} + {"sparse_field": ["you know, for testing", "now with chunks"]} {"index":{}} - {"sparse_field": "some more tests"} + {"sparse_field": ["some more tests", "that include chunks"]} + + - do: + search: + index: test-index + body: + query: + nested: + path: sparse_field.inference.chunks + query: + text_expansion: + sparse_field.inference.chunks.embeddings: + model_id: sparse-inference-id + model_text: "you know, for testing" + # Search with inner hits - do: search: + _source: false index: test-index body: query: nested: path: sparse_field.inference.chunks + inner_hits: + _source: false + fields: [sparse_field.inference.chunks.text] query: text_expansion: sparse_field.inference.chunks.embeddings: @@ -107,5 +152,8 @@ setup: - match: { hits.total.value: 2 } - match: { hits.total.relation: eq } + - match: { hits.hits.0.inner_hits.sparse_field\.inference\.chunks.hits.total.value: 2 } + - exists: hits.hits.0.inner_hits.sparse_field\.inference\.chunks.hits.hits.0.fields.sparse_field\.inference\.chunks.0.text + - exists: hits.hits.0.inner_hits.sparse_field\.inference\.chunks.hits.hits.1.fields.sparse_field\.inference\.chunks.0.text From 84a2735740edbafe95020f07c1f0b1940f9fa71c Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Tue, 9 Apr 2024 12:23:55 +0200 Subject: [PATCH 3/3] Add mapping incompatibility tests --- .../mapper/SemanticTextFieldMapper.java | 10 +- ...tic_text_inference_incompatible_models.yml | 190 ++++++++++++++++++ 2 files changed, 199 insertions(+), 1 deletion(-) create mode 100644 x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_incompatible_models.yml diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java index 2536825a9e0b7..08d11f7bd41f2 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldMapper.java @@ -217,7 +217,15 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio try { conflicts.check(); } catch (Exception exc) { - throw new DocumentParsingException(xContentLocation, "Incompatible model_settings", exc); + throw new DocumentParsingException( + xContentLocation, + "Incompatible model settings for field [" + + name() + + "]. Check that the " + + INFERENCE_ID_FIELD.getPreferredName() + + " is not using different model settings", + exc + ); } mapper = this; } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_incompatible_models.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_incompatible_models.yml new file mode 100644 index 0000000000000..48a73a02ef645 --- /dev/null +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference_incompatible_models.yml @@ -0,0 +1,190 @@ +setup: + - skip: + version: " - 8.12.99" + reason: semantic_text introduced in 8.13.0 # TODO change when 8.13.0 is released + + - do: + inference.put_model: + task_type: sparse_embedding + inference_id: sparse-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + - do: + inference.put_model: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + indices.create: + index: test-index + body: + mappings: + properties: + sparse_field: + type: semantic_text + inference_id: sparse-inference-id + dense_field: + type: semantic_text + inference_id: dense-inference-id + + # Index a doc to set mappings internally + - do: + index: + index: test-index + id: doc_1 + body: + dense_field: "inference test" + sparse_field: "another inference test" + +--- +"Fails for non-compatible dimensions": + + - do: + inference.delete_model: + task_type: text_embedding + inference_id: dense-inference-id + + - do: + inference.put_model: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 20, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + catch: /Incompatible model settings for field \[dense_field\].+/ + index: + index: test-index + id: doc_2 + body: + dense_field: "some other test" + +--- +"Fails for non-compatible similarity": + + - do: + inference.delete_model: + task_type: text_embedding + inference_id: dense-inference-id + + - do: + inference.put_model: + task_type: text_embedding + inference_id: dense-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "similarity": "dot_product", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + catch: /Incompatible model settings for field \[dense_field\].+/ + index: + index: test-index + id: doc_2 + body: + dense_field: "some other test" + +--- +"Fails for non-compatible task type for dense vectors": + + - do: + inference.delete_model: + task_type: text_embedding + inference_id: dense-inference-id + + + - do: + inference.put_model: + task_type: sparse_embedding + inference_id: dense-inference-id + body: > + { + "service": "test_service", + "service_settings": { + "model": "my_model", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + catch: /Incompatible model settings for field \[dense_field\].+/ + index: + index: test-index + id: doc_2 + body: + dense_field: "some other test" + +--- +"Fails for non-compatible task type for sparse vectors": + + - do: + inference.delete_model: + task_type: sparse_embedding + inference_id: sparse-inference-id + + - do: + inference.put_model: + task_type: text_embedding + inference_id: sparse-inference-id + body: > + { + "service": "text_embedding_test_service", + "service_settings": { + "model": "my_model", + "dimensions": 10, + "similarity": "cosine", + "api_key": "abc64" + }, + "task_settings": { + } + } + + - do: + catch: /Incompatible model settings for field \[sparse_field\].+/ + index: + index: test-index + id: doc_2 + body: + sparse_field: "some other test" + +