From 223e7f829bdc1b262344d98ed9deb565086475e3 Mon Sep 17 00:00:00 2001 From: Luca Cavanna Date: Wed, 17 Apr 2024 19:37:04 +0200 Subject: [PATCH] Avoid attempting to load the same empty field twice in fetch phase (#107551) During the fetch phase, there's a number of stored fields that are requested explicitly or loaded by default. That information is included in `StoredFieldsSpec` that each fetch sub phase exposes. We attempt to provide stored fields that are already loaded to the fields lookup that scripts as well as value fetchers use to load field values (via `SearchLookup`). This is done in `PreloadedFieldLookupProvider.` The current logic makes available values for fields that have been found, so that scripts or value fetchers that request them don't load them again ad-hoc. What happens though for stored fields that don't have a value for a specific doc, is that they are treated like any other field that was not requested, and loaded again, although they will not be found, which causes overhead. This change makes available to `PreloadedFieldLookupProvider` the list of required stored fields, so that it can better distinguish between fields that we already attempted to load (although we may not have found a value for them) and those that need to be loaded ad-hoc (for instance because a script is requesting them for the first time). This is an existing issue, that has become evident as we moved fetching of metadata fields to `FetchFieldsPhase`, that relies on value fetchers, and hence on `SearchLookup`. We end up attempting to load default metadata fields (`_ignored` and `_routing`) twice when they are not present in a document, which makes us call `LeafReader#storedFields` additional times for the same document providing a `SingleFieldVisitor` that will never find a value. Another existing issue that this PR fixes is for the `FetchFieldsPhase` to extend the `StoredFieldsSpec` that it exposes to include the metadata fields that the phase is now responsible for loading. That results in `_ignored` being included in the output of the debug stored fields section when profiling is enabled. The fact that it was previously missing is an existing bug (it was missing in `StoredFieldLoader#fieldsToLoad`). Yet another existing issues that this PR fixes is that `_id` has been until now always loaded on demand when requested via fetch fields or script. That is because it is not part of the preloaded stored fields that the fetch phase passes over to the `PreloadedFieldLookupProvider`. That causes overhead as the field has already been loaded, and should not be loaded once again when explicitly requested. --- docs/changelog/107551.yaml | 5 ++ docs/reference/search/profile.asciidoc | 4 +- .../rest-api-spec/test/30_inner_hits.yml | 2 +- .../rest-api-spec/test/search/370_profile.yml | 6 +- .../search/source/MetadataFetchingIT.java | 14 +++++ .../search/fetch/FetchPhase.java | 8 ++- .../fetch/PreloadedFieldLookupProvider.java | 39 +++++++++++-- .../fetch/subphase/FetchFieldsPhase.java | 5 +- .../search/lookup/FieldLookup.java | 4 +- .../PreloadedFieldLookupProviderTests.java | 16 +++++- .../fetch/subphase/FetchFieldsPhaseTests.java | 57 +++++++++++++++++++ 11 files changed, 141 insertions(+), 19 deletions(-) create mode 100644 docs/changelog/107551.yaml diff --git a/docs/changelog/107551.yaml b/docs/changelog/107551.yaml new file mode 100644 index 0000000000000..78e64cc526638 --- /dev/null +++ b/docs/changelog/107551.yaml @@ -0,0 +1,5 @@ +pr: 107551 +summary: Avoid attempting to load the same empty field twice in fetch phase +area: Search +type: bug +issues: [] diff --git a/docs/reference/search/profile.asciidoc b/docs/reference/search/profile.asciidoc index 3fed14231808c..48c65ed0abc7b 100644 --- a/docs/reference/search/profile.asciidoc +++ b/docs/reference/search/profile.asciidoc @@ -194,7 +194,7 @@ The API returns the following result: "load_source_count": 5 }, "debug": { - "stored_fields": ["_id", "_routing", "_source"] + "stored_fields": ["_id", "_ignored", "_routing", "_source"] }, "children": [ { @@ -1051,7 +1051,7 @@ And here is the fetch profile: "load_source_count": 5 }, "debug": { - "stored_fields": ["_id", "_routing", "_source"] + "stored_fields": ["_id", "_ignored", "_routing", "_source"] }, "children": [ { diff --git a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml index a561ebbae00e9..eff9a9beb35bc 100644 --- a/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml +++ b/modules/parent-join/src/yamlRestTest/resources/rest-api-spec/test/30_inner_hits.yml @@ -140,7 +140,7 @@ profile fetch: - gt: { profile.shards.0.fetch.breakdown.next_reader: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } - - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } + - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _ignored, _routing, _source] } - length: { profile.shards.0.fetch.children: 4 } - match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } - gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml index 200f7292291b1..dda3d14a5ae1d 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/370_profile.yml @@ -41,7 +41,7 @@ fetch fields: - gt: { profile.shards.0.fetch.breakdown.next_reader: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } - - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } + - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _ignored, _routing, _source] } - length: { profile.shards.0.fetch.children: 2 } - match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } - gt: { profile.shards.0.fetch.children.0.breakdown.next_reader_count: 0 } @@ -74,7 +74,7 @@ fetch source: - gt: { profile.shards.0.fetch.breakdown.next_reader: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } - - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } + - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _ignored, _routing, _source] } - length: { profile.shards.0.fetch.children: 3 } - match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } - match: { profile.shards.0.fetch.children.1.type: FetchSourcePhase } @@ -139,7 +139,7 @@ fetch nested source: - gt: { profile.shards.0.fetch.breakdown.next_reader: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields_count: 0 } - gt: { profile.shards.0.fetch.breakdown.load_stored_fields: 0 } - - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _routing, _source] } + - match: { profile.shards.0.fetch.debug.stored_fields: [_id, _ignored, _routing, _source] } - length: { profile.shards.0.fetch.children: 4 } - match: { profile.shards.0.fetch.children.0.type: FetchFieldsPhase } - match: { profile.shards.0.fetch.children.1.type: FetchSourcePhase } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/source/MetadataFetchingIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/source/MetadataFetchingIT.java index b8d1d45a6f85d..9e0dd984c9a2a 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/source/MetadataFetchingIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/source/MetadataFetchingIT.java @@ -168,4 +168,18 @@ public void testInvalid() { assertThat(exc.getMessage(), equalTo("cannot combine _none_ with other fields")); } } + + public void testFetchId() { + assertAcked(prepareCreate("test")); + ensureGreen(); + + prepareIndex("test").setId("1").setSource("field", "value").get(); + refresh(); + + assertResponse(prepareSearch("test").addFetchField("_id"), response -> { + assertEquals(1, response.getHits().getHits().length); + assertEquals("1", response.getHits().getAt(0).getId()); + assertEquals("1", response.getHits().getAt(0).field("_id").getValue()); + }); + } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 2fa3e903a0074..4b5c647da0c9a 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -112,9 +112,13 @@ private SearchHits buildSearchHits(SearchContext context, int[] docIdsToLoad, Pr context.getSearchExecutionContext().setLookupProviders(sourceProvider, ctx -> fieldLookupProvider); List processors = getProcessors(context.shardTarget(), fetchContext, profiler); - StoredFieldsSpec storedFieldsSpec = StoredFieldsSpec.build(processors, FetchSubPhaseProcessor::storedFieldsSpec); storedFieldsSpec = storedFieldsSpec.merge(new StoredFieldsSpec(false, false, sourceLoader.requiredStoredFields())); + // Ideally the required stored fields would be provided as constructor argument a few lines above, but that requires moving + // the getProcessors call to before the setLookupProviders call, which causes weird issues in InnerHitsPhase. + // setLookupProviders resets the SearchLookup used throughout the rest of the fetch phase, which StoredValueFetchers rely on + // to retrieve stored fields, and InnerHitsPhase is the last sub-fetch phase and re-runs the entire fetch phase. + fieldLookupProvider.setPreloadedStoredFieldNames(storedFieldsSpec.requiredStoredFields()); StoredFieldLoader storedFieldLoader = profiler.storedFields(StoredFieldLoader.fromSpec(storedFieldsSpec)); IdLoader idLoader = context.newIdLoader(); @@ -164,7 +168,7 @@ protected SearchHit nextDoc(int doc) throws IOException { leafIdLoader ); sourceProvider.source = hit.source(); - fieldLookupProvider.storedFields = hit.loadedFields(); + fieldLookupProvider.setPreloadedStoredFieldValues(hit.hit().getId(), hit.loadedFields()); for (FetchSubPhaseProcessor processor : processors) { processor.process(hit); } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java b/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java index 31cd74c878a0f..b335ce4aa2800 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java @@ -9,12 +9,16 @@ package org.elasticsearch.search.fetch; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.SetOnce; +import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.search.lookup.FieldLookup; import org.elasticsearch.search.lookup.LeafFieldLookupProvider; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Supplier; /** @@ -26,15 +30,22 @@ */ class PreloadedFieldLookupProvider implements LeafFieldLookupProvider { - Map> storedFields; - LeafFieldLookupProvider backUpLoader; - Supplier loaderSupplier; + private final SetOnce> preloadedStoredFieldNames = new SetOnce<>(); + private Map> preloadedStoredFieldValues; + private String id; + private LeafFieldLookupProvider backUpLoader; + private Supplier loaderSupplier; @Override public void populateFieldLookup(FieldLookup fieldLookup, int doc) throws IOException { String field = fieldLookup.fieldType().name(); - if (storedFields.containsKey(field)) { - fieldLookup.setValues(storedFields.get(field)); + + if (field.equals(IdFieldMapper.NAME)) { + fieldLookup.setValues(Collections.singletonList(id)); + return; + } + if (preloadedStoredFieldNames.get().contains(field)) { + fieldLookup.setValues(preloadedStoredFieldValues.get(field)); return; } // stored field not preloaded, go and get it directly @@ -44,8 +55,26 @@ public void populateFieldLookup(FieldLookup fieldLookup, int doc) throws IOExcep backUpLoader.populateFieldLookup(fieldLookup, doc); } + void setPreloadedStoredFieldNames(Set preloadedStoredFieldNames) { + this.preloadedStoredFieldNames.set(preloadedStoredFieldNames); + } + + void setPreloadedStoredFieldValues(String id, Map> preloadedStoredFieldValues) { + assert preloadedStoredFieldNames.get().containsAll(preloadedStoredFieldValues.keySet()) + : "Provided stored field that was not expected to be preloaded? " + + preloadedStoredFieldValues.keySet() + + " - " + + preloadedStoredFieldNames; + this.preloadedStoredFieldValues = preloadedStoredFieldValues; + this.id = id; + } + void setNextReader(LeafReaderContext ctx) { backUpLoader = null; loaderSupplier = () -> LeafFieldLookupProvider.fromStoredFields().apply(ctx); } + + LeafFieldLookupProvider getBackUpLoader() { + return backUpLoader; + } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhase.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhase.java index 882eb1cf9c75b..287c47505bf3a 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhase.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhase.java @@ -40,6 +40,7 @@ public final class FetchFieldsPhase implements FetchSubPhase { private static final List DEFAULT_METADATA_FIELDS = List.of( new FieldAndFormat(IgnoredFieldMapper.NAME, null), new FieldAndFormat(RoutingFieldMapper.NAME, null), + // will only be fetched when mapped (older archived indices) new FieldAndFormat(LegacyTypeFieldMapper.NAME, null) ); @@ -95,9 +96,9 @@ public void setNextReader(LeafReaderContext readerContext) { @Override public StoredFieldsSpec storedFieldsSpec() { if (fieldFetcher != null) { - return fieldFetcher.storedFieldsSpec(); + return metadataFieldFetcher.storedFieldsSpec().merge(fieldFetcher.storedFieldsSpec()); } - return StoredFieldsSpec.NO_REQUIREMENTS; + return metadataFieldFetcher.storedFieldsSpec(); } @Override diff --git a/server/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java b/server/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java index fa4eb8f21f78c..97ff1862c7852 100644 --- a/server/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java +++ b/server/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java @@ -31,7 +31,9 @@ public MappedFieldType fieldType() { */ public void setValues(List values) { assert valuesLoaded == false : "Call clear() before calling setValues()"; - values.stream().map(fieldType::valueForDisplay).forEach(this.values::add); + if (values != null) { + values.stream().map(fieldType::valueForDisplay).forEach(this.values::add); + } this.valuesLoaded = true; } diff --git a/server/src/test/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProviderTests.java b/server/src/test/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProviderTests.java index 85d9c32a1ee5b..13cdb01156f05 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProviderTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProviderTests.java @@ -13,11 +13,13 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.index.mapper.IdFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.lookup.FieldLookup; import org.elasticsearch.test.ESTestCase; import java.io.IOException; +import java.util.Collections; import java.util.List; import java.util.Locale; import java.util.Map; @@ -30,7 +32,16 @@ public class PreloadedFieldLookupProviderTests extends ESTestCase { public void testFallback() throws IOException { PreloadedFieldLookupProvider lookup = new PreloadedFieldLookupProvider(); - lookup.storedFields = Map.of("foo", List.of("bar")); + lookup.setPreloadedStoredFieldNames(Collections.singleton("foo")); + lookup.setPreloadedStoredFieldValues("id", Map.of("foo", List.of("bar"))); + + MappedFieldType idFieldType = mock(MappedFieldType.class); + when(idFieldType.name()).thenReturn(IdFieldMapper.NAME); + when(idFieldType.valueForDisplay(any())).then(invocation -> (invocation.getArguments()[0])); + FieldLookup idFieldLookup = new FieldLookup(idFieldType); + lookup.populateFieldLookup(idFieldLookup, 0); + assertEquals("id", idFieldLookup.getValue()); + assertNull(lookup.getBackUpLoader()); // fallback didn't get used because 'foo' is in the list MappedFieldType fieldType = mock(MappedFieldType.class); when(fieldType.name()).thenReturn("foo"); @@ -39,7 +50,7 @@ public void testFallback() throws IOException { lookup.populateFieldLookup(fieldLookup, 0); assertEquals("BAR", fieldLookup.getValue()); - assertNull(lookup.backUpLoader); // fallback didn't get used because 'foo' is in the list + assertNull(lookup.getBackUpLoader()); // fallback didn't get used because 'foo' is in the list MappedFieldType unloadedFieldType = mock(MappedFieldType.class); when(unloadedFieldType.name()).thenReturn("unloaded"); @@ -56,5 +67,4 @@ public void testFallback() throws IOException { lookup.populateFieldLookup(unloadedFieldLookup, 0); assertEquals("VALUE", unloadedFieldLookup.getValue()); } - } diff --git a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhaseTests.java b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhaseTests.java index 39e73837c83ea..3a7460c05ca87 100644 --- a/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhaseTests.java +++ b/server/src/test/java/org/elasticsearch/search/fetch/subphase/FetchFieldsPhaseTests.java @@ -19,6 +19,7 @@ import org.elasticsearch.index.mapper.DocValueFetcher; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.NestedLookup; +import org.elasticsearch.index.mapper.StoredValueFetcher; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.DocValueFormat; import org.elasticsearch.search.SearchHit; @@ -26,6 +27,9 @@ import org.elasticsearch.search.fetch.FetchContext; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.FetchSubPhaseProcessor; +import org.elasticsearch.search.fetch.StoredFieldsContext; +import org.elasticsearch.search.fetch.StoredFieldsSpec; +import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.search.lookup.Source; import org.elasticsearch.test.ESTestCase; @@ -35,6 +39,7 @@ import java.util.Set; import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.eq; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -90,6 +95,58 @@ public void testDocValueFetcher() throws IOException { reader.close(); dir.close(); + } + + public void testStoredFieldsSpec() { + StoredFieldsContext storedFieldsContext = StoredFieldsContext.fromList(List.of("stored", "_metadata")); + FetchFieldsContext ffc = new FetchFieldsContext(List.of(new FieldAndFormat("field", null))); + + SearchLookup searchLookup = mock(SearchLookup.class); + + SearchExecutionContext sec = mock(SearchExecutionContext.class); + when(sec.isMetadataField(any())).then(invocation -> invocation.getArguments()[0].toString().startsWith("_")); + + MappedFieldType routingFt = mock(MappedFieldType.class); + when(routingFt.valueFetcher(any(), any())).thenReturn(new StoredValueFetcher(searchLookup, "_routing")); + when(sec.getFieldType(eq("_routing"))).thenReturn(routingFt); + + // this would normally not be mapped -> getMatchingFieldsNames would not resolve it (unless for older archive indices) + MappedFieldType typeFt = mock(MappedFieldType.class); + when(typeFt.valueFetcher(any(), any())).thenReturn(new StoredValueFetcher(searchLookup, "_type")); + when(sec.getFieldType(eq("_type"))).thenReturn(typeFt); + + MappedFieldType ignoredFt = mock(MappedFieldType.class); + when(ignoredFt.valueFetcher(any(), any())).thenReturn(new StoredValueFetcher(searchLookup, "_ignored")); + when(sec.getFieldType(eq("_ignored"))).thenReturn(ignoredFt); + + // Ideally we would test that explicitly requested stored fields are included in stored fields spec, but isStored is final hence it + // can't be mocked. In reality, _metadata would be included but stored would not. + MappedFieldType storedFt = mock(MappedFieldType.class); + when(sec.getFieldType(eq("stored"))).thenReturn(storedFt); + MappedFieldType metadataFt = mock(MappedFieldType.class); + when(sec.getFieldType(eq("_metadata"))).thenReturn(metadataFt); + + MappedFieldType fieldType = mock(MappedFieldType.class); + when(fieldType.valueFetcher(any(), any())).thenReturn( + new DocValueFetcher( + DocValueFormat.RAW, + new SortedNumericIndexFieldData("field", IndexNumericFieldData.NumericType.LONG, CoreValuesSourceType.NUMERIC, null, false) + ) + ); + when(sec.getFieldType(eq("field"))).thenReturn(fieldType); + when(sec.getMatchingFieldNames(any())).then(invocation -> Set.of(invocation.getArguments()[0])); + when(sec.nestedLookup()).thenReturn(NestedLookup.EMPTY); + FetchContext fetchContext = mock(FetchContext.class); + when(fetchContext.fetchFieldsContext()).thenReturn(ffc); + when(fetchContext.storedFieldsContext()).thenReturn(storedFieldsContext); + when(fetchContext.getSearchExecutionContext()).thenReturn(sec); + FetchFieldsPhase fetchFieldsPhase = new FetchFieldsPhase(); + FetchSubPhaseProcessor processor = fetchFieldsPhase.getProcessor(fetchContext); + StoredFieldsSpec storedFieldsSpec = processor.storedFieldsSpec(); + assertEquals(3, storedFieldsSpec.requiredStoredFields().size()); + assertTrue(storedFieldsSpec.requiredStoredFields().contains("_routing")); + assertTrue(storedFieldsSpec.requiredStoredFields().contains("_ignored")); + assertTrue(storedFieldsSpec.requiredStoredFields().contains("_type")); } }