From c3fd01d14ceb24f5de58d939cf6066e9de771ab3 Mon Sep 17 00:00:00 2001 From: Max Hniebergall <137079448+maxhniebergall@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:12:04 -0400 Subject: [PATCH 1/4] AwaitsFix: https://github.com/elastic/elasticsearch/issues/110591 --- muted-tests.yml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/muted-tests.yml b/muted-tests.yml index d46a9355c201f..79372be872928 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -4,7 +4,8 @@ tests: method: "testGuessIsDayFirstFromLocale" - class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ + \ dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" @@ -13,7 +14,8 @@ tests: method: "testTrainedModelInference" - class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" + method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale\ + \ dependent mappings / dates}" - class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" issue: "https://github.com/elastic/elasticsearch/issues/109189" method: "test {p0=esql/70_locale/Date format with Italian locale}" @@ -28,7 +30,8 @@ tests: method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" - method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" + method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling\ + \ (too complex pattern)}" - class: "org.elasticsearch.xpack.ml.integration.ClassificationHousePricingIT" issue: "https://github.com/elastic/elasticsearch/issues/101598" method: "testFeatureImportanceValues" @@ -95,8 +98,11 @@ tests: issue: "https://github.com/elastic/elasticsearch/issues/110408" method: "testCreateAndRestorePartialSearchableSnapshot" - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT - method: test {p0=search.vectors/41_knn_search_half_byte_quantized/Test create, merge, and search cosine} + method: test {p0=search.vectors/41_knn_search_half_byte_quantized/Test create, merge, + and search cosine} issue: https://github.com/elastic/elasticsearch/issues/109978 +- class: "org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT" + issue: "https://github.com/elastic/elasticsearch/issues/110591" # Examples: # From d05f97021cf5f1dea8cd54c2c42c261850e9c02a Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Mon, 8 Jul 2024 08:36:19 -0700 Subject: [PATCH 2/4] Fix MapperBuilderContext#isDataStream when used in dynamic mappers (#110554) --- docs/changelog/110554.yaml | 5 ++ .../index/mapper/DocumentParserContext.java | 2 +- .../mapper/DocumentParserContextTests.java | 52 +++++++++++++++++++ 3 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 docs/changelog/110554.yaml diff --git a/docs/changelog/110554.yaml b/docs/changelog/110554.yaml new file mode 100644 index 0000000000000..8c0b896a4c979 --- /dev/null +++ b/docs/changelog/110554.yaml @@ -0,0 +1,5 @@ +pr: 110554 +summary: Fix `MapperBuilderContext#isDataStream` when used in dynamic mappers +area: "Mapping" +type: bug +issues: [] diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index d8fa2919b795f..248369b249007 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -673,7 +673,7 @@ public final MapperBuilderContext createDynamicMapperBuilderContext() { return new MapperBuilderContext( p, mappingLookup.isSourceSynthetic(), - false, + mappingLookup.isDataStreamTimestampFieldEnabled(), containsDimensions, dynamic, MergeReason.MAPPING_UPDATE, diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java index ab1c93cd98277..2826243e4c866 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DocumentParserContextTests.java @@ -11,7 +11,9 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentParser; +import org.elasticsearch.xcontent.XContentType; import org.elasticsearch.xcontent.json.JsonXContent; import java.io.IOException; @@ -81,4 +83,54 @@ public void testSwitchParser() throws IOException { assertEquals(parser, newContext.parser()); assertEquals("1", newContext.indexSettings().getSettings().get("index.mapping.total_fields.limit")); } + + public void testCreateDynamicMapperBuilderContextFromEmptyContext() throws IOException { + var resultFromEmptyParserContext = context.createDynamicMapperBuilderContext(); + + assertEquals("hey", resultFromEmptyParserContext.buildFullName("hey")); + assertFalse(resultFromEmptyParserContext.isSourceSynthetic()); + assertFalse(resultFromEmptyParserContext.isDataStream()); + assertFalse(resultFromEmptyParserContext.parentObjectContainsDimensions()); + assertEquals(ObjectMapper.Defaults.DYNAMIC, resultFromEmptyParserContext.getDynamic()); + assertEquals(MapperService.MergeReason.MAPPING_UPDATE, resultFromEmptyParserContext.getMergeReason()); + assertFalse(resultFromEmptyParserContext.isInNestedContext()); + } + + public void testCreateDynamicMapperBuilderContext() throws IOException { + var mapping = XContentBuilder.builder(XContentType.JSON.xContent()) + .startObject() + .startObject("_doc") + .startObject("_source") + .field("mode", "synthetic") + .endObject() + .startObject(DataStreamTimestampFieldMapper.NAME) + .field("enabled", "true") + .endObject() + .startObject("properties") + .startObject(DataStreamTimestampFieldMapper.DEFAULT_PATH) + .field("type", "date") + .endObject() + .startObject("foo") + .field("type", "passthrough") + .field("time_series_dimension", "true") + .field("priority", "100") + .endObject() + .endObject() + .endObject() + .endObject(); + var documentMapper = new MapperServiceTestCase() { + }.createDocumentMapper(mapping); + var parserContext = new TestDocumentParserContext(documentMapper.mappers(), null); + parserContext.path().add("foo"); + + var resultFromParserContext = parserContext.createDynamicMapperBuilderContext(); + + assertEquals("foo.hey", resultFromParserContext.buildFullName("hey")); + assertTrue(resultFromParserContext.isSourceSynthetic()); + assertTrue(resultFromParserContext.isDataStream()); + assertTrue(resultFromParserContext.parentObjectContainsDimensions()); + assertEquals(ObjectMapper.Defaults.DYNAMIC, resultFromParserContext.getDynamic()); + assertEquals(MapperService.MergeReason.MAPPING_UPDATE, resultFromParserContext.getMergeReason()); + assertFalse(resultFromParserContext.isInNestedContext()); + } } From 930ff47c2f7388b5cf6d0a3235256f7d91394e45 Mon Sep 17 00:00:00 2001 From: Tim Grein Date: Mon, 8 Jul 2024 17:37:06 +0200 Subject: [PATCH 3/4] [Inference API] Use extractOptionalPositiveInteger in MistralEmbeddingsServiceSettings for dims and maxInputTokens (#110485) --- .../MistralEmbeddingsServiceSettings.java | 3 +- ...MistralEmbeddingsServiceSettingsTests.java | 80 +++++++++++++++++++ 2 files changed, 81 insertions(+), 2 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettings.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettings.java index 62d06a4e0029c..2e4d546e1dc4c 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettings.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettings.java @@ -33,7 +33,6 @@ import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractOptionalPositiveInteger; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractRequiredString; import static org.elasticsearch.xpack.inference.services.ServiceUtils.extractSimilarity; -import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeAsType; import static org.elasticsearch.xpack.inference.services.mistral.MistralConstants.MODEL_FIELD; public class MistralEmbeddingsServiceSettings extends FilteredXContentObject implements ServiceSettings { @@ -67,7 +66,7 @@ public static MistralEmbeddingsServiceSettings fromMap(Map map, MistralService.NAME, context ); - Integer dims = removeAsType(map, DIMENSIONS, Integer.class); + Integer dims = extractOptionalPositiveInteger(map, DIMENSIONS, ModelConfigurations.SERVICE_SETTINGS, validationException); if (validationException.validationErrors().isEmpty() == false) { throw validationException; diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettingsTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettingsTests.java index 076986acdcee6..009a6dbdeb793 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettingsTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/mistral/embeddings/MistralEmbeddingsServiceSettingsTests.java @@ -8,6 +8,7 @@ package org.elasticsearch.xpack.inference.services.mistral.embeddings; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ValidationException; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.core.Nullable; @@ -27,6 +28,7 @@ import java.util.Map; import static org.elasticsearch.xpack.inference.services.ServiceFields.SIMILARITY; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.is; public class MistralEmbeddingsServiceSettingsTests extends ESTestCase { @@ -77,6 +79,84 @@ public void testFromMap_PersistentContext_DoesNotThrowException_WhenDimensionsIs assertThat(serviceSettings, is(new MistralEmbeddingsServiceSettings(model, null, null, null, null))); } + public void testFromMap_ThrowsException_WhenDimensionsAreZero() { + var model = "mistral-embed"; + var dimensions = 0; + + var settingsMap = createRequestSettingsMap(model, dimensions, null, SimilarityMeasure.COSINE); + + var thrownException = expectThrows( + ValidationException.class, + () -> MistralEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + assertThat( + thrownException.getMessage(), + containsString("Validation Failed: 1: [service_settings] Invalid value [0]. [dimensions] must be a positive integer;") + ); + } + + public void testFromMap_ThrowsException_WhenDimensionsAreNegative() { + var model = "mistral-embed"; + var dimensions = randomNegativeInt(); + + var settingsMap = createRequestSettingsMap(model, dimensions, null, SimilarityMeasure.COSINE); + + var thrownException = expectThrows( + ValidationException.class, + () -> MistralEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + assertThat( + thrownException.getMessage(), + containsString( + Strings.format( + "Validation Failed: 1: [service_settings] Invalid value [%d]. [dimensions] must be a positive integer;", + dimensions + ) + ) + ); + } + + public void testFromMap_ThrowsException_WhenMaxInputTokensAreZero() { + var model = "mistral-embed"; + var maxInputTokens = 0; + + var settingsMap = createRequestSettingsMap(model, null, maxInputTokens, SimilarityMeasure.COSINE); + + var thrownException = expectThrows( + ValidationException.class, + () -> MistralEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + assertThat( + thrownException.getMessage(), + containsString("Validation Failed: 1: [service_settings] Invalid value [0]. [max_input_tokens] must be a positive integer;") + ); + } + + public void testFromMap_ThrowsException_WhenMaxInputTokensAreNegative() { + var model = "mistral-embed"; + var maxInputTokens = randomNegativeInt(); + + var settingsMap = createRequestSettingsMap(model, null, maxInputTokens, SimilarityMeasure.COSINE); + + var thrownException = expectThrows( + ValidationException.class, + () -> MistralEmbeddingsServiceSettings.fromMap(settingsMap, ConfigurationParseContext.REQUEST) + ); + + assertThat( + thrownException.getMessage(), + containsString( + Strings.format( + "Validation Failed: 1: [service_settings] Invalid value [%d]. [max_input_tokens] must be a positive integer;", + maxInputTokens + ) + ) + ); + } + public void testFromMap_PersistentContext_DoesNotThrowException_WhenSimilarityIsPresent() { var model = "mistral-embed"; From b01949c6aa82a2ab56f13f01c34da3768a1a56fe Mon Sep 17 00:00:00 2001 From: David Kyle Date: Mon, 8 Jul 2024 17:22:59 +0100 Subject: [PATCH 4/4] [ML] Fixes processing chunked results in AWS Bedrock service (#110592) Fixes error using the Amazon Bedrock service with a large input that was chunked. --- .../amazonbedrock/AmazonBedrockService.java | 24 +------------------ .../azureopenai/AzureOpenAiService.java | 18 -------------- .../AmazonBedrockServiceTests.java | 21 +++++++++------- 3 files changed, 14 insertions(+), 49 deletions(-) diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java index dadcc8a40245e..459ca367058f8 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockService.java @@ -23,10 +23,6 @@ import org.elasticsearch.inference.ModelSecrets; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; -import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker; import org.elasticsearch.xpack.inference.external.action.amazonbedrock.AmazonBedrockActionCreator; import org.elasticsearch.xpack.inference.external.amazonbedrock.AmazonBedrockRequestSender; @@ -47,7 +43,6 @@ import java.util.Set; import static org.elasticsearch.TransportVersions.ML_INFERENCE_AMAZON_BEDROCK_ADDED; -import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.parsePersistedConfigErrorMsg; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; @@ -115,10 +110,6 @@ protected void doChunkedInfer( TimeValue timeout, ActionListener> listener ) { - ActionListener inferListener = listener.delegateFailureAndWrap( - (delegate, response) -> delegate.onResponse(translateToChunkedResults(input, response)) - ); - var actionCreator = new AmazonBedrockActionCreator(amazonBedrockSender, this.getServiceComponents(), timeout); if (model instanceof AmazonBedrockModel baseAmazonBedrockModel) { var maxBatchSize = getEmbeddingsMaxBatchSize(baseAmazonBedrockModel.provider()); @@ -126,26 +117,13 @@ protected void doChunkedInfer( .batchRequestsWithListeners(listener); for (var request : batchedRequests) { var action = baseAmazonBedrockModel.accept(actionCreator, taskSettings); - action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, inferListener); + action.execute(new DocumentsOnlyInput(request.batch().inputs()), timeout, request.listener()); } } else { listener.onFailure(createInvalidModelException(model)); } } - private static List translateToChunkedResults( - List inputs, - InferenceServiceResults inferenceResults - ) { - if (inferenceResults instanceof InferenceTextEmbeddingFloatResults textEmbeddingResults) { - return InferenceChunkedTextEmbeddingFloatResults.listOf(inputs, textEmbeddingResults); - } else if (inferenceResults instanceof ErrorInferenceResults error) { - return List.of(new ErrorChunkedInferenceResults(error.getException())); - } else { - throw createInvalidChunkedResultException(InferenceTextEmbeddingFloatResults.NAME, inferenceResults.getWriteableName()); - } - } - @Override public String name() { return NAME; diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java index 3facb78864831..3c75243770f97 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiService.java @@ -24,10 +24,6 @@ import org.elasticsearch.inference.SimilarityMeasure; import org.elasticsearch.inference.TaskType; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.xpack.core.inference.results.ErrorChunkedInferenceResults; -import org.elasticsearch.xpack.core.inference.results.InferenceChunkedTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.inference.results.InferenceTextEmbeddingFloatResults; -import org.elasticsearch.xpack.core.ml.inference.results.ErrorInferenceResults; import org.elasticsearch.xpack.inference.chunking.EmbeddingRequestChunker; import org.elasticsearch.xpack.inference.external.action.azureopenai.AzureOpenAiActionCreator; import org.elasticsearch.xpack.inference.external.http.sender.DocumentsOnlyInput; @@ -44,7 +40,6 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.core.inference.results.ResultUtils.createInvalidChunkedResultException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.createInvalidModelException; import static org.elasticsearch.xpack.inference.services.ServiceUtils.parsePersistedConfigErrorMsg; import static org.elasticsearch.xpack.inference.services.ServiceUtils.removeFromMapOrDefaultEmpty; @@ -246,19 +241,6 @@ protected void doChunkedInfer( } } - private static List translateToChunkedResults( - List inputs, - InferenceServiceResults inferenceResults - ) { - if (inferenceResults instanceof InferenceTextEmbeddingFloatResults textEmbeddingResults) { - return InferenceChunkedTextEmbeddingFloatResults.listOf(inputs, textEmbeddingResults); - } else if (inferenceResults instanceof ErrorInferenceResults error) { - return List.of(new ErrorChunkedInferenceResults(error.getException())); - } else { - throw createInvalidChunkedResultException(InferenceTextEmbeddingFloatResults.NAME, inferenceResults.getWriteableName()); - } - } - /** * For text embedding models get the embedding size and * update the service settings. diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java index 00a840c8d4812..ae413fc17425c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java @@ -1048,13 +1048,18 @@ public void testChunkedInfer_CallsInfer_ConvertsFloatResponse_ForEmbeddings() th try (var service = new AmazonBedrockService(factory, amazonBedrockFactory, createWithEmptySettings(threadPool))) { try (var requestSender = (AmazonBedrockMockRequestSender) amazonBedrockFactory.createSender()) { - var mockResults = new InferenceTextEmbeddingFloatResults( - List.of( - new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F }), - new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.456F, 0.987F }) - ) - ); - requestSender.enqueue(mockResults); + { + var mockResults1 = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.123F, 0.678F })) + ); + requestSender.enqueue(mockResults1); + } + { + var mockResults2 = new InferenceTextEmbeddingFloatResults( + List.of(new InferenceTextEmbeddingFloatResults.InferenceFloatEmbedding(new float[] { 0.223F, 0.278F })) + ); + requestSender.enqueue(mockResults2); + } var model = AmazonBedrockEmbeddingsModelTests.createModel( "id", @@ -1089,7 +1094,7 @@ public void testChunkedInfer_CallsInfer_ConvertsFloatResponse_ForEmbeddings() th var floatResult = (InferenceChunkedTextEmbeddingFloatResults) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); assertEquals("xyz", floatResult.chunks().get(0).matchedText()); - assertArrayEquals(new float[] { 0.456F, 0.987F }, floatResult.chunks().get(0).embedding(), 0.0f); + assertArrayEquals(new float[] { 0.223F, 0.278F }, floatResult.chunks().get(0).embedding(), 0.0f); } } }