From 0e21a562b2ba412e18715131a3b1f7e12203f125 Mon Sep 17 00:00:00 2001 From: Oleksandr Kolomiiets Date: Wed, 23 Oct 2024 13:22:26 -0700 Subject: [PATCH] Apply workaround for synthetic source of object arrays inside nested objects (#115275) --- rest-api-spec/build.gradle | 1 + .../21_synthetic_source_stored.yml | 11 ++- .../index/mapper/DocumentParser.java | 6 +- .../index/mapper/DocumentParserContext.java | 39 +++++--- .../mapper/IgnoredSourceFieldMapper.java | 3 + .../index/mapper/MapperFeatures.java | 3 +- .../mapper/IgnoredSourceFieldMapperTests.java | 88 +++++++++++++++++++ 7 files changed, 132 insertions(+), 19 deletions(-) diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle index 7525ff2dc12d2..4bd293f0a8641 100644 --- a/rest-api-spec/build.gradle +++ b/rest-api-spec/build.gradle @@ -59,4 +59,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task -> task.replaceValueInMatch("profile.shards.0.dfs.knn.0.query.0.description", "DocAndScoreQuery[0,...][0.009673266,...],0.009673266", "dfs knn vector profiling with vector_operations_count") task.skipTest("indices.sort/10_basic/Index Sort", "warning does not exist for compatibility") task.skipTest("search/330_fetch_fields/Test search rewrite", "warning does not exist for compatibility") + task.skipTest("indices.create/21_synthetic_source_stored/object param - nested object with stored array", "temporary until backported") }) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml index eab51427876aa..6a4e92f694220 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/21_synthetic_source_stored.yml @@ -319,8 +319,8 @@ object param - nested object array next to other fields: --- object param - nested object with stored array: - requires: - cluster_features: ["mapper.synthetic_source_keep", "mapper.bwc_workaround_9_0"] - reason: requires tracking ignored source + cluster_features: ["mapper.ignored_source.always_store_object_arrays_in_nested", "mapper.bwc_workaround_9_0"] + reason: requires fix to object array handling - do: indices.create: @@ -356,8 +356,11 @@ object param - nested object with stored array: sort: name - match: { hits.total.value: 2 } - match: { hits.hits.0._source.name: A } - - match: { hits.hits.0._source.nested_array_regular.0.b.c: [ 10, 100] } - - match: { hits.hits.0._source.nested_array_regular.1.b.c: [ 20, 200] } + # due to a workaround for #115261 + - match: { hits.hits.0._source.nested_array_regular.0.b.0.c: 10 } + - match: { hits.hits.0._source.nested_array_regular.0.b.1.c: 100 } + - match: { hits.hits.0._source.nested_array_regular.1.b.0.c: 20 } + - match: { hits.hits.0._source.nested_array_regular.1.b.1.c: 200 } - match: { hits.hits.1._source.name: B } - match: { hits.hits.1._source.nested_array_stored.0.b.0.c: 10 } - match: { hits.hits.1._source.nested_array_stored.0.b.1.c: 100 } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index bac987a3df96d..1ed0a117ddd89 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -810,8 +810,10 @@ private static void parseNonDynamicArray( boolean objectWithFallbackSyntheticSource = false; if (mapper instanceof ObjectMapper objectMapper) { mode = getSourceKeepMode(context, objectMapper.sourceKeepMode()); - objectWithFallbackSyntheticSource = (mode == Mapper.SourceKeepMode.ALL - || (mode == Mapper.SourceKeepMode.ARRAYS && objectMapper instanceof NestedObjectMapper == false)); + objectWithFallbackSyntheticSource = mode == Mapper.SourceKeepMode.ALL + // Inside nested objects we always store object arrays as a workaround for #115261. + || ((context.inNestedScope() || mode == Mapper.SourceKeepMode.ARRAYS) + && objectMapper instanceof NestedObjectMapper == false); } boolean fieldWithFallbackSyntheticSource = false; boolean fieldWithStoredArraySource = false; diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java index ef87ce52fbabf..3b1f1a6d2809a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParserContext.java @@ -104,6 +104,16 @@ public int get() { } } + /** + * Defines the scope parser is currently in. + * This is used for synthetic source related logic during parsing. + */ + private enum Scope { + SINGLETON, + ARRAY, + NESTED + } + private final MappingLookup mappingLookup; private final MappingParserContext mappingParserContext; private final SourceToParse sourceToParse; @@ -112,7 +122,7 @@ public int get() { private final List ignoredFieldValues; private final List ignoredFieldsMissingValues; private boolean inArrayScopeEnabled; - private boolean inArrayScope; + private Scope currentScope; private final Map> dynamicMappers; private final DynamicMapperSize dynamicMappersSize; @@ -145,7 +155,7 @@ private DocumentParserContext( List ignoredFieldValues, List ignoredFieldsWithNoSource, boolean inArrayScopeEnabled, - boolean inArrayScope, + Scope currentScope, Map> dynamicMappers, Map dynamicObjectMappers, Map> dynamicRuntimeFields, @@ -167,7 +177,7 @@ private DocumentParserContext( this.ignoredFieldValues = ignoredFieldValues; this.ignoredFieldsMissingValues = ignoredFieldsWithNoSource; this.inArrayScopeEnabled = inArrayScopeEnabled; - this.inArrayScope = inArrayScope; + this.currentScope = currentScope; this.dynamicMappers = dynamicMappers; this.dynamicObjectMappers = dynamicObjectMappers; this.dynamicRuntimeFields = dynamicRuntimeFields; @@ -192,7 +202,7 @@ private DocumentParserContext(ObjectMapper parent, ObjectMapper.Dynamic dynamic, in.ignoredFieldValues, in.ignoredFieldsMissingValues, in.inArrayScopeEnabled, - in.inArrayScope, + in.currentScope, in.dynamicMappers, in.dynamicObjectMappers, in.dynamicRuntimeFields, @@ -224,7 +234,7 @@ protected DocumentParserContext( new ArrayList<>(), new ArrayList<>(), mappingParserContext.getIndexSettings().isSyntheticSourceSecondDocParsingPassEnabled(), - false, + Scope.SINGLETON, new HashMap<>(), new HashMap<>(), new HashMap<>(), @@ -335,7 +345,7 @@ public final void deduplicateIgnoredFieldValues(final Set fullNames) { public final DocumentParserContext addIgnoredFieldFromContext(IgnoredSourceFieldMapper.NameValue ignoredFieldWithNoSource) throws IOException { if (canAddIgnoredField()) { - if (inArrayScope) { + if (currentScope == Scope.ARRAY) { // The field is an array within an array, store all sub-array elements. ignoredFieldsMissingValues.add(ignoredFieldWithNoSource); return cloneWithRecordedSource(); @@ -379,10 +389,10 @@ public final DocumentParserContext maybeCloneForArray(Mapper mapper) throws IOEx if (canAddIgnoredField() && mapper instanceof ObjectMapper && mapper instanceof NestedObjectMapper == false - && inArrayScope == false + && currentScope != Scope.ARRAY && inArrayScopeEnabled) { DocumentParserContext subcontext = switchParser(parser()); - subcontext.inArrayScope = true; + subcontext.currentScope = Scope.ARRAY; return subcontext; } return this; @@ -673,6 +683,10 @@ public boolean isWithinCopyTo() { return false; } + public boolean inNestedScope() { + return currentScope == Scope.NESTED; + } + public final DocumentParserContext createChildContext(ObjectMapper parent) { return new Wrapper(parent, this); } @@ -716,10 +730,11 @@ public LuceneDocument doc() { return document; } }; - // Disable tracking array scopes for ignored source, as it would be added to the parent doc. - // Nested documents are added to preserve object structure within arrays of objects, so the use - // of ignored source for arrays inside them should be mostly redundant. - cloned.inArrayScope = false; + + cloned.currentScope = Scope.NESTED; + // Disable using second parsing pass since it currently can not determine which parts + // of source belong to which nested document. + // See #115261. cloned.inArrayScopeEnabled = false; return cloned; } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index 296c2c5311d9a..70d73fc2ffb9a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -58,6 +58,9 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper { static final NodeFeature TRACK_IGNORED_SOURCE = new NodeFeature("mapper.track_ignored_source"); static final NodeFeature DONT_EXPAND_DOTS_IN_IGNORED_SOURCE = new NodeFeature("mapper.ignored_source.dont_expand_dots"); + static final NodeFeature ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS = new NodeFeature( + "mapper.ignored_source.always_store_object_arrays_in_nested" + ); /* Setting to disable encoding and writing values for this field. diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 31c89b2fc8ad4..026c7c98d7aeb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -62,7 +62,8 @@ public Set getTestFeatures() { return Set.of( RangeFieldMapper.DATE_RANGE_INDEXING_FIX, IgnoredSourceFieldMapper.DONT_EXPAND_DOTS_IN_IGNORED_SOURCE, - SourceFieldMapper.REMOVE_SYNTHETIC_SOURCE_ONLY_VALIDATION + SourceFieldMapper.REMOVE_SYNTHETIC_SOURCE_ONLY_VALIDATION, + IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS ); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 934744ef3ef96..7a4ce8bcb03fa 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -962,6 +962,94 @@ public void testArrayWithNestedObjects() throws IOException { {"path":{"to":[{"id":[1,20,3]},{"id":10},{"id":0}]}}""", syntheticSource); } + public void testObjectArrayWithinNestedObjects() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").startObject("properties"); + { + b.startObject("to").field("type", "nested").startObject("properties"); + { + b.startObject("obj").startObject("properties"); + { + b.startObject("id").field("type", "integer").field("synthetic_source_keep", "arrays").endObject(); + } + b.endObject().endObject(); + } + b.endObject().endObject(); + } + b.endObject().endObject(); + })).documentMapper(); + + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startObject("path"); + { + b.startObject("to"); + { + b.startArray("obj"); + { + b.startObject().array("id", 1, 20, 3).endObject(); + b.startObject().field("id", 10).endObject(); + } + b.endArray(); + } + b.endObject(); + } + b.endObject(); + }); + assertEquals(""" + {"path":{"to":{"obj":[{"id":[1,20,3]},{"id":10}]}}}""", syntheticSource); + } + + public void testObjectArrayWithinNestedObjectsArray() throws IOException { + DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { + b.startObject("path").startObject("properties"); + { + b.startObject("to").field("type", "nested").startObject("properties"); + { + b.startObject("obj").startObject("properties"); + { + b.startObject("id").field("type", "integer").field("synthetic_source_keep", "arrays").endObject(); + } + b.endObject().endObject(); + } + b.endObject().endObject(); + } + b.endObject().endObject(); + })).documentMapper(); + + var syntheticSource = syntheticSource(documentMapper, b -> { + b.startObject("path"); + { + b.startArray("to"); + { + b.startObject(); + { + b.startArray("obj"); + { + b.startObject().array("id", 1, 20, 3).endObject(); + b.startObject().field("id", 10).endObject(); + } + b.endArray(); + } + b.endObject(); + b.startObject(); + { + b.startArray("obj"); + { + b.startObject().array("id", 200, 300, 500).endObject(); + b.startObject().field("id", 100).endObject(); + } + b.endArray(); + } + b.endObject(); + } + b.endArray(); + } + b.endObject(); + }); + assertEquals(""" + {"path":{"to":[{"obj":[{"id":[1,20,3]},{"id":10}]},{"obj":[{"id":[200,300,500]},{"id":100}]}]}}""", syntheticSource); + } + public void testArrayWithinArray() throws IOException { DocumentMapper documentMapper = createMapperService(syntheticSourceMapping(b -> { b.startObject("path");