From 6b262006970a3a4e53d135b0c837c41bbfd77e99 Mon Sep 17 00:00:00 2001 From: carlosdelest Date: Mon, 1 Apr 2024 20:34:53 +0200 Subject: [PATCH] Check that bulk updates use all source fields for updating a semantic_text field --- .../ShardBulkInferenceActionFilter.java | 15 +++--- .../inference/10_semantic_text_inference.yml | 46 ++++++++++++++++++- 2 files changed, 51 insertions(+), 10 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index 0fb75bddedc31..ebe480a68c3fd 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -388,10 +388,12 @@ private Map> createFieldInferenceRequests(Bu // item was already aborted/processed by a filter in the chain upstream (e.g. security) continue; } + boolean isUpdateRequest = false; final IndexRequest indexRequest; if (item.request() instanceof IndexRequest ir) { indexRequest = ir; } else if (item.request() instanceof UpdateRequest updateRequest) { + isUpdateRequest = true; if (updateRequest.script() != null) { addInferenceResponseFailure( item.id(), @@ -409,25 +411,20 @@ private Map> createFieldInferenceRequests(Bu continue; } final Map docMap = indexRequest.sourceAsMap(); - final Map inferenceMap = XContentMapValues.nodeMapValue( - docMap.computeIfAbsent(InferenceMetadataFieldMapper.NAME, k -> new LinkedHashMap()), - InferenceMetadataFieldMapper.NAME - ); for (var entry : fieldInferenceMap.values()) { String field = entry.getName(); String inferenceId = entry.getInferenceId(); for (var sourceField : entry.getSourceFields()) { - Object inferenceResult = inferenceMap.remove(field); var value = XContentMapValues.extractValue(sourceField, docMap); if (value == null) { - if (inferenceResult != null) { + if (isUpdateRequest) { addInferenceResponseFailure( item.id(), new ElasticsearchStatusException( - "The field [{}] is referenced in the [{}] metadata field but has no value", + "Field [{}] must be specified on an update request to calculate inference for field [{}]", RestStatus.BAD_REQUEST, - field, - InferenceMetadataFieldMapper.NAME + sourceField, + field ) ); } diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml index 2bde3af2777fe..08ba32d08bd62 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml @@ -392,6 +392,9 @@ setup: source_field: type: text copy_to: inference_field + another_source_field: + type: text + copy_to: inference_field - do: index: @@ -400,6 +403,7 @@ setup: body: source_field: "copy_to inference test" inference_field: "inference test" + another_source_field: "another copy_to inference test" - do: get: @@ -407,8 +411,48 @@ setup: id: doc_1 - match: { _source.inference_field: "inference test" } - - length: {_source._inference.inference_field.chunks: 2} + - length: { _source._inference.inference_field.chunks: 3 } - exists: _source._inference.inference_field.chunks.0.inference - exists: _source._inference.inference_field.chunks.0.text - exists: _source._inference.inference_field.chunks.1.inference - exists: _source._inference.inference_field.chunks.1.text + - exists: _source._inference.inference_field.chunks.2.inference + - exists: _source._inference.inference_field.chunks.2.text + + +--- +"semantic_text copy_to needs values for every source field for updates": + - do: + indices.create: + index: test-copy-to-index + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + source_field: + type: text + copy_to: inference_field + another_source_field: + type: text + copy_to: inference_field + + # Not every source field needed on creation + - do: + index: + index: test-copy-to-index + id: doc_1 + body: + source_field: "a single source field provided" + inference_field: "inference test" + + # Every source field needed on bulk updates + - do: + bulk: + body: + - '{"update": {"_index": "test-copy-to-index", "_id": "doc_1"}}' + - '{"doc": {"source_field": "a single source field is kept as provided via bulk", "inference_field": "updated inference test" }}' + + - match: { items.0.update.status: 400 } + - match: { items.0.update.error.reason: "Field [another_source_field] must be specified on an update request to calculate inference for field [inference_field]" }