diff --git a/server/src/test/java/org/elasticsearch/index/mapper/CopyToMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/CopyToMapperTests.java index 5eacfe6f2e3ab..33341e6b36987 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/CopyToMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/CopyToMapperTests.java @@ -22,6 +22,7 @@ import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; import static org.hamcrest.Matchers.equalTo; @@ -106,6 +107,12 @@ public void testCopyToFieldsParsing() throws Exception { fieldMapper = mapperService.documentMapper().mappers().getMapper("new_field"); assertThat(fieldMapper.typeName(), equalTo("long")); + + MappingLookup mappingLookup = mapperService.mappingLookup(); + assertThat(mappingLookup.sourcePaths("another_field"), equalTo(Set.of("copy_test", "int_to_str_test", "another_field"))); + assertThat(mappingLookup.sourcePaths("new_field"), equalTo(Set.of("new_field", "int_to_str_test"))); + assertThat(mappingLookup.sourcePaths("copy_test"), equalTo(Set.of("copy_test", "cyclic_test"))); + assertThat(mappingLookup.sourcePaths("cyclic_test"), equalTo(Set.of("cyclic_test", "copy_test"))); } public void testCopyToFieldsInnerObjectParsing() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java index d7df41131414e..6446033c07c5b 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/MultiFieldTests.java @@ -224,6 +224,9 @@ public void testSourcePathFields() throws IOException { final Set fieldsUsingSourcePath = new HashSet<>(); ((FieldMapper) mapper).sourcePathUsedBy().forEachRemaining(mapper1 -> fieldsUsingSourcePath.add(mapper1.name())); assertThat(fieldsUsingSourcePath, equalTo(Set.of("field.subfield1", "field.subfield2"))); + + assertThat(mapperService.mappingLookup().sourcePaths("field.subfield1"), equalTo(Set.of("field"))); + assertThat(mapperService.mappingLookup().sourcePaths("field.subfield2"), equalTo(Set.of("field"))); } public void testUnknownLegacyFieldsUnderKnownRootField() throws Exception { diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index fef62051a6471..2e6f66c64fa95 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -388,10 +388,12 @@ private Map> createFieldInferenceRequests(Bu // item was already aborted/processed by a filter in the chain upstream (e.g. security) continue; } + boolean isUpdateRequest = false; final IndexRequest indexRequest; if (item.request() instanceof IndexRequest ir) { indexRequest = ir; } else if (item.request() instanceof UpdateRequest updateRequest) { + isUpdateRequest = true; if (updateRequest.script() != null) { addInferenceResponseFailure( item.id(), @@ -417,35 +419,50 @@ private Map> createFieldInferenceRequests(Bu String field = entry.getName(); String inferenceId = entry.getInferenceId(); Object inferenceResult = inferenceMap.remove(field); - var value = XContentMapValues.extractValue(field, docMap); - if (value == null) { - if (inferenceResult != null) { + for (var sourceField : entry.getSourceFields()) { + var value = XContentMapValues.extractValue(sourceField, docMap); + if (value == null) { + if (isUpdateRequest) { + addInferenceResponseFailure( + item.id(), + new ElasticsearchStatusException( + "Field [{}] must be specified on an update request to calculate inference for field [{}]", + RestStatus.BAD_REQUEST, + sourceField, + field + ) + ); + } else if (inferenceResult != null) { + addInferenceResponseFailure( + item.id(), + new ElasticsearchStatusException( + "The field [{}] is referenced in the [{}] metadata field but has no value", + RestStatus.BAD_REQUEST, + field, + InferenceMetadataFieldMapper.NAME + ) + ); + } + continue; + } + ensureResponseAccumulatorSlot(item.id()); + if (value instanceof String valueStr) { + List fieldRequests = fieldRequestsMap.computeIfAbsent( + inferenceId, + k -> new ArrayList<>() + ); + fieldRequests.add(new FieldInferenceRequest(item.id(), field, valueStr)); + } else { addInferenceResponseFailure( item.id(), new ElasticsearchStatusException( - "The field [{}] is referenced in the [{}] metadata field but has no value", + "Invalid format for field [{}], expected [String] got [{}]", RestStatus.BAD_REQUEST, field, - InferenceMetadataFieldMapper.NAME + value.getClass().getSimpleName() ) ); } - continue; - } - ensureResponseAccumulatorSlot(item.id()); - if (value instanceof String valueStr) { - List fieldRequests = fieldRequestsMap.computeIfAbsent(inferenceId, k -> new ArrayList<>()); - fieldRequests.add(new FieldInferenceRequest(item.id(), field, valueStr)); - } else { - addInferenceResponseFailure( - item.id(), - new ElasticsearchStatusException( - "Invalid format for field [{}], expected [String] got [{}]", - RestStatus.BAD_REQUEST, - field, - value.getClass().getSimpleName() - ) - ); } } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/InferenceMetadataFieldMapper.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/InferenceMetadataFieldMapper.java index 702f686605e56..89d1037243aac 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/InferenceMetadataFieldMapper.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/InferenceMetadataFieldMapper.java @@ -348,6 +348,8 @@ private void parseResultsObject( } parser.nextToken(); fieldMapper.parse(context); + // Reset leaf object after parsing the field + context.path().setWithinLeafObject(true); } if (visited.containsAll(REQUIRED_SUBFIELDS) == false) { Set missingSubfields = REQUIRED_SUBFIELDS.stream() @@ -383,6 +385,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { return SourceLoader.SyntheticFieldLoader.NOTHING; } + @SuppressWarnings("unchecked") public static void applyFieldInference( Map inferenceMap, String field, @@ -407,11 +410,12 @@ public static void applyFieldInference( results.getWriteableName() ); } - Map fieldMap = new LinkedHashMap<>(); - fieldMap.put(INFERENCE_ID, model.getInferenceEntityId()); + + Map fieldMap = (Map) inferenceMap.computeIfAbsent(field, s -> new LinkedHashMap<>()); fieldMap.putAll(new SemanticTextModelSettings(model).asMap()); - fieldMap.put(CHUNKS, chunks); - inferenceMap.put(field, fieldMap); + List> fieldChunks = (List>) fieldMap.computeIfAbsent(CHUNKS, k -> new ArrayList<>()); + fieldChunks.addAll(chunks); + fieldMap.put(INFERENCE_ID, model.getInferenceEntityId()); } record SemanticTextMapperContext(MapperBuilderContext context, SemanticTextFieldMapper mapper) {} diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java index 4c1cc8fa38bb4..1c4a2f561ad4a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/cluster/metadata/SemanticTextClusterMetadataTests.java @@ -16,11 +16,15 @@ import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.xpack.inference.InferencePlugin; +import org.hamcrest.Matchers; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.List; +import static org.hamcrest.CoreMatchers.equalTo; + public class SemanticTextClusterMetadataTests extends ESSingleNodeTestCase { @Override @@ -36,7 +40,7 @@ public void testCreateIndexWithSemanticTextField() { assertEquals(indexService.getMetadata().getInferenceFields().get("field").getInferenceId(), "test_model"); } - public void testAddSemanticTextField() throws Exception { + public void testSingleSourceSemanticTextField() throws Exception { final IndexService indexService = createIndex("test", client().admin().indices().prepareCreate("test")); final MetadataMappingService mappingService = getInstanceFromNode(MetadataMappingService.class); final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); @@ -53,6 +57,45 @@ public void testAddSemanticTextField() throws Exception { assertEquals(resultingState.metadata().index("test").getInferenceFields().get("field").getInferenceId(), "test_model"); } + public void testCopyToSemanticTextField() throws Exception { + final IndexService indexService = createIndex("test", client().admin().indices().prepareCreate("test")); + final MetadataMappingService mappingService = getInstanceFromNode(MetadataMappingService.class); + final MetadataMappingService.PutMappingExecutor putMappingExecutor = mappingService.new PutMappingExecutor(); + final ClusterService clusterService = getInstanceFromNode(ClusterService.class); + + final PutMappingClusterStateUpdateRequest request = new PutMappingClusterStateUpdateRequest(""" + { + "properties": { + "semantic": { + "type": "semantic_text", + "inference_id": "test_model" + }, + "copy_origin_1": { + "type": "text", + "copy_to": "semantic" + }, + "copy_origin_2": { + "type": "text", + "copy_to": "semantic" + } + } + } + """); + request.indices(new Index[] { indexService.index() }); + final var resultingState = ClusterStateTaskExecutorUtils.executeAndAssertSuccessful( + clusterService.state(), + putMappingExecutor, + singleTask(request) + ); + IndexMetadata indexMetadata = resultingState.metadata().index("test"); + InferenceFieldMetadata inferenceFieldMetadata = indexMetadata.getInferenceFields().get("semantic"); + assertThat(inferenceFieldMetadata.getInferenceId(), equalTo("test_model")); + assertThat( + Arrays.asList(inferenceFieldMetadata.getSourceFields()), + Matchers.containsInAnyOrder("semantic", "copy_origin_1", "copy_origin_2") + ); + } + private static List singleTask(PutMappingClusterStateUpdateRequest request) { return Collections.singletonList(new MetadataMappingService.PutMappingClusterStateUpdateTask(request, ActionListener.running(() -> { throw new AssertionError("task should not complete publication"); diff --git a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml index 8847fb7f7efc1..0a07a88d230ef 100644 --- a/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml +++ b/x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/10_semantic_text_inference.yml @@ -377,3 +377,98 @@ setup: - match: { errors: true } - match: { items.0.update.status: 400 } - match: { items.0.update.error.reason: "Cannot apply update with a script on indices that contain [semantic_text] field(s)" } + +--- +"Fails when providing inference results and there is no value for field": + - do: + catch: /The field \[inference_field\] is referenced in the \[_inference\] metadata field but has no value/ + index: + index: test-sparse-index + id: doc_1 + body: + _inference: + inference_field: + chunks: + - text: "inference test" + inference: + "hello": 0.123 + + +--- +"semantic_text copy_to calculate inference for source fields": + - do: + indices.create: + index: test-copy-to-index + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + source_field: + type: text + copy_to: inference_field + another_source_field: + type: text + copy_to: inference_field + + - do: + index: + index: test-copy-to-index + id: doc_1 + body: + source_field: "copy_to inference test" + inference_field: "inference test" + another_source_field: "another copy_to inference test" + + - do: + get: + index: test-copy-to-index + id: doc_1 + + - match: { _source.inference_field: "inference test" } + - length: { _source._inference.inference_field.chunks: 3 } + - exists: _source._inference.inference_field.chunks.0.inference + - exists: _source._inference.inference_field.chunks.0.text + - exists: _source._inference.inference_field.chunks.1.inference + - exists: _source._inference.inference_field.chunks.1.text + - exists: _source._inference.inference_field.chunks.2.inference + - exists: _source._inference.inference_field.chunks.2.text + + +--- +"semantic_text copy_to needs values for every source field for updates": + - do: + indices.create: + index: test-copy-to-index + body: + mappings: + properties: + inference_field: + type: semantic_text + inference_id: dense-inference-id + source_field: + type: text + copy_to: inference_field + another_source_field: + type: text + copy_to: inference_field + + # Not every source field needed on creation + - do: + index: + index: test-copy-to-index + id: doc_1 + body: + source_field: "a single source field provided" + inference_field: "inference test" + + # Every source field needed on bulk updates + - do: + bulk: + body: + - '{"update": {"_index": "test-copy-to-index", "_id": "doc_1"}}' + - '{"doc": {"source_field": "a single source field is kept as provided via bulk", "inference_field": "updated inference test" }}' + + - match: { items.0.update.status: 400 } + - match: { items.0.update.error.reason: "Field [another_source_field] must be specified on an update request to calculate inference for field [inference_field]" }